diff --git a/include/cuco/detail/open_addressing/constraints.cuh b/include/cuco/detail/open_addressing/constraints.cuh index b9409c70d..64d0d0486 100644 --- a/include/cuco/detail/open_addressing/constraints.cuh +++ b/include/cuco/detail/open_addressing/constraints.cuh @@ -48,16 +48,16 @@ struct open_addressing_compatible { [] { if constexpr (has_payload) { constexpr auto payload_size = sizeof(typename Value::second_type); - return payload_size == 4 or payload_size == 8 #if defined(CUCO_HAS_128BIT_ATOMICS) - or payload_size == 16 + return payload_size <= 16; +#else + return payload_size <= 8; #endif - ; } else { return true; } }(), - "Payload size must be 4 or 8 bytes (or 16 with sm_90+)."); + "Payload size exceeds the maximum supported size (8 bytes, or 16 with sm_90+)."); static_assert( cuco::is_bitwise_comparable_v, diff --git a/include/cuco/detail/pair/helpers.cuh b/include/cuco/detail/pair/helpers.cuh index c21cbdb86..b78191096 100644 --- a/include/cuco/detail/pair/helpers.cuh +++ b/include/cuco/detail/pair/helpers.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2025, NVIDIA CORPORATION. + * Copyright (c) 2021-2026, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,6 +47,22 @@ struct packed { using type = void; ///< `void` type by default }; +/** + * @brief Denotes the packed type when the size of the object is 1. + */ +template <> +struct packed { + using type = uint8_t; ///< Packed type as `uint8_t` if the size of the object is 1 +}; + +/** + * @brief Denotes the packed type when the size of the object is 2. + */ +template <> +struct packed { + using type = uint16_t; ///< Packed type as `uint16_t` if the size of the object is 2 +}; + /** * @brief Denotes the packed type when the size of the object is 8. */ diff --git a/tests/static_map/find_test.cu b/tests/static_map/find_test.cu index 7e1289bb6..5d9376309 100644 --- a/tests/static_map/find_test.cu +++ b/tests/static_map/find_test.cu @@ -138,6 +138,11 @@ TEMPLATE_TEST_CASE_SIG( Value, Probe, CGSize), + (int8_t, int8_t, cuco::test::probe_sequence::double_hashing, 1), + (int8_t, int8_t, cuco::test::probe_sequence::double_hashing, 2), + (int8_t, int16_t, cuco::test::probe_sequence::double_hashing, 2), + (int16_t, int16_t, cuco::test::probe_sequence::double_hashing, 1), + (int16_t, int16_t, cuco::test::probe_sequence::double_hashing, 2), (int32_t, int32_t, cuco::test::probe_sequence::double_hashing, 1), (int32_t, int64_t, cuco::test::probe_sequence::double_hashing, 1), (int32_t, int32_t, cuco::test::probe_sequence::double_hashing, 2), @@ -146,6 +151,11 @@ TEMPLATE_TEST_CASE_SIG( (int64_t, int64_t, cuco::test::probe_sequence::double_hashing, 1), (int64_t, int32_t, cuco::test::probe_sequence::double_hashing, 2), (int64_t, int64_t, cuco::test::probe_sequence::double_hashing, 2), + (int8_t, int8_t, cuco::test::probe_sequence::linear_probing, 1), + (int8_t, int8_t, cuco::test::probe_sequence::linear_probing, 2), + (int8_t, int16_t, cuco::test::probe_sequence::linear_probing, 2), + (int16_t, int16_t, cuco::test::probe_sequence::linear_probing, 1), + (int16_t, int16_t, cuco::test::probe_sequence::linear_probing, 2), (int32_t, int32_t, cuco::test::probe_sequence::linear_probing, 1), (int32_t, int64_t, cuco::test::probe_sequence::linear_probing, 1), (int32_t, int32_t, cuco::test::probe_sequence::linear_probing, 2), @@ -162,7 +172,7 @@ TEMPLATE_TEST_CASE_SIG( #endif ) { - constexpr size_type num_keys{301}; + constexpr size_type num_keys = (sizeof(Key) == 1) ? 100 : 301; // XXX: testing static extent is intended, DO NOT CHANGE using extent_type = cuco::extent; @@ -180,17 +190,19 @@ TEMPLATE_TEST_CASE_SIG( } }(); - auto map = cuco::static_map, - probe, - cuco::cuda_allocator, - cuco::storage<2>>{ - extent_type{}, cuco::empty_key{SENTINEL}, cuco::empty_value{SENTINEL}}; - - REQUIRE(map.capacity() == gold_capacity); + auto map = + cuco::static_map, + probe, + cuco::cuda_allocator, + cuco::storage<2>>{extent_type{}, + cuco::empty_key{static_cast(SENTINEL)}, + cuco::empty_value{static_cast(SENTINEL)}}; + + if constexpr (sizeof(Key) > 1) { REQUIRE(map.capacity() == gold_capacity); } test_unique_sequence(map, num_keys); } diff --git a/tests/static_set/for_each_test.cu b/tests/static_set/for_each_test.cu index 5374f8fac..b8d34fe72 100644 --- a/tests/static_set/for_each_test.cu +++ b/tests/static_set/for_each_test.cu @@ -26,10 +26,12 @@ #include +#include + using size_type = std::size_t; template -void test_for_each(Set& set, size_type num_keys) +void test_for_each(Set& set, size_type num_keys, size_type expected_evens, size_type expected_odds) { using Key = typename Set::key_type; @@ -40,7 +42,7 @@ void test_for_each(Set& set, size_type num_keys) // Insert keys auto keys_begin = cuda::make_transform_iterator( cuda::counting_iterator{0}, cuda::proclaim_return_type([] __device__(auto i) { - // generates a sequence of 1, 2, 1, 2, ... + // generates a sequence of 0, 1, 2, ... return static_cast(i); })); set.insert(keys_begin, keys_begin + num_keys, stream); @@ -56,7 +58,7 @@ void test_for_each(Set& set, size_type num_keys) if (slot % 2 == 0) { counter->fetch_add(slot, cuda::memory_order_relaxed); } }, stream); - REQUIRE(counter_storage.load_to_host(stream) == 249'500); + REQUIRE(counter_storage.load_to_host(stream) == expected_evens); counter_storage.reset(stream); @@ -68,13 +70,21 @@ void test_for_each(Set& set, size_type num_keys) if (!(slot % 2 == 0)) { counter->fetch_add(slot, cuda::memory_order_relaxed); } }, stream); - REQUIRE(counter_storage.load_to_host(stream) == 250'000); + REQUIRE(counter_storage.load_to_host(stream) == expected_odds); } TEMPLATE_TEST_CASE_SIG( "static_set for_each tests", "", ((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize), + (int8_t, cuco::test::probe_sequence::double_hashing, 1), + (int8_t, cuco::test::probe_sequence::double_hashing, 2), + (int8_t, cuco::test::probe_sequence::linear_probing, 1), + (int8_t, cuco::test::probe_sequence::linear_probing, 2), + (int16_t, cuco::test::probe_sequence::double_hashing, 1), + (int16_t, cuco::test::probe_sequence::double_hashing, 2), + (int16_t, cuco::test::probe_sequence::linear_probing, 1), + (int16_t, cuco::test::probe_sequence::linear_probing, 2), (int32_t, cuco::test::probe_sequence::double_hashing, 1), (int32_t, cuco::test::probe_sequence::double_hashing, 2), (int64_t, cuco::test::probe_sequence::double_hashing, 1), @@ -92,7 +102,14 @@ TEMPLATE_TEST_CASE_SIG( #endif ) { - constexpr size_type num_keys{1'000}; + // Limit key count for small types: leave room for the -1 sentinel. + // Expected sums are pre-computed per type class: + // int16_t (num_keys=100): sum of evens 0..98 = 2450, sum of odds 1..99 = 2500 + // int16_t+ (num_keys=1000): sum of evens 0..998 = 249'500, sum of odds 1..999 = 250'000 + constexpr size_type num_keys = (sizeof(Key) == 1) ? 100 : 1'000; + constexpr size_type expected_evens = (sizeof(Key) == 1) ? 2'450 : 249'500; + constexpr size_type expected_odds = (sizeof(Key) == 1) ? 2'500 : 250'000; + using probe = std::conditional_t< Probe == cuco::test::probe_sequence::linear_probing, cuco::linear_probing>, @@ -107,5 +124,5 @@ TEMPLATE_TEST_CASE_SIG( cuco::storage<2>>; auto set = set_t{num_keys, cuco::empty_key{static_cast(-1)}}; - test_for_each(set, num_keys); + test_for_each(set, num_keys, expected_evens, expected_odds); } diff --git a/tests/static_set/insert_and_find_test.cu b/tests/static_set/insert_and_find_test.cu index 7eefdfb6c..f235116b1 100644 --- a/tests/static_set/insert_and_find_test.cu +++ b/tests/static_set/insert_and_find_test.cu @@ -25,6 +25,8 @@ #include +#include + template void test_insert_and_find(Set& set, std::size_t num_keys) { @@ -56,6 +58,13 @@ TEMPLATE_TEST_CASE_SIG( "static_set Insert and find", "", ((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize), + (int8_t, cuco::test::probe_sequence::double_hashing, 1), + (int8_t, cuco::test::probe_sequence::linear_probing, 1), + (int8_t, cuco::test::probe_sequence::linear_probing, 2), + (int16_t, cuco::test::probe_sequence::double_hashing, 1), + (int16_t, cuco::test::probe_sequence::double_hashing, 2), + (int16_t, cuco::test::probe_sequence::linear_probing, 1), + (int16_t, cuco::test::probe_sequence::linear_probing, 2), (int32_t, cuco::test::probe_sequence::double_hashing, 1), (int32_t, cuco::test::probe_sequence::double_hashing, 2), (int64_t, cuco::test::probe_sequence::double_hashing, 1), @@ -73,7 +82,8 @@ TEMPLATE_TEST_CASE_SIG( #endif ) { - constexpr std::size_t num_keys{400}; + // Limit key count for small types: leave room for the -1 sentinel + constexpr std::size_t num_keys = (sizeof(Key) == 1) ? 100 : 400; using probe = std::conditional_t>, diff --git a/tests/static_set/retrieve_all_test.cu b/tests/static_set/retrieve_all_test.cu index ed2e70abb..b626a63f5 100644 --- a/tests/static_set/retrieve_all_test.cu +++ b/tests/static_set/retrieve_all_test.cu @@ -27,6 +27,8 @@ #include +#include + template void test_unique_sequence(Set& set, std::size_t num_keys) { @@ -61,6 +63,13 @@ TEMPLATE_TEST_CASE_SIG( "static_set::retrieve_all tests", "", ((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize), + (int8_t, cuco::test::probe_sequence::double_hashing, 1), + (int8_t, cuco::test::probe_sequence::linear_probing, 1), + (int8_t, cuco::test::probe_sequence::linear_probing, 2), + (int16_t, cuco::test::probe_sequence::double_hashing, 1), + (int16_t, cuco::test::probe_sequence::double_hashing, 2), + (int16_t, cuco::test::probe_sequence::linear_probing, 1), + (int16_t, cuco::test::probe_sequence::linear_probing, 2), (int32_t, cuco::test::probe_sequence::double_hashing, 1), (int32_t, cuco::test::probe_sequence::double_hashing, 2), (int64_t, cuco::test::probe_sequence::double_hashing, 1), @@ -78,7 +87,8 @@ TEMPLATE_TEST_CASE_SIG( #endif ) { - constexpr std::size_t num_keys{400}; + // Limit key count for small types: leave room for the -1 sentinel + constexpr std::size_t num_keys = (sizeof(Key) == 1) ? 100 : 400; constexpr double desired_load_factor = 1.; using probe = std::conditional_t::value) { - return (CGSize == 1) ? 401 // 401 x 1 x 1 - : 422; // 211 x 2 x 1 + if constexpr (num_keys == 100) { + return (CGSize == 1) ? 101 // 101 x 1 x 1 + : 106; // 53 x 2 x 1 + } else { + return (CGSize == 1) ? 401 // 401 x 1 x 1 + : 422; // 211 x 2 x 1 + } } else { - return 400; + return num_keys; } }(); diff --git a/tests/static_set/retrieve_if_test.cu b/tests/static_set/retrieve_if_test.cu index b82969926..41dcfca1e 100644 --- a/tests/static_set/retrieve_if_test.cu +++ b/tests/static_set/retrieve_if_test.cu @@ -27,6 +27,8 @@ #include +#include + using size_type = std::size_t; template @@ -110,6 +112,8 @@ __global__ void test_retrieve_if_all_true_kernel( TEMPLATE_TEST_CASE_SIG("static_set retrieve_if", "", ((typename Key), Key), + (int8_t), + (int16_t), (int32_t), (int64_t) #if defined(CUCO_HAS_128BIT_ATOMICS) @@ -118,7 +122,8 @@ TEMPLATE_TEST_CASE_SIG("static_set retrieve_if", #endif ) { - constexpr size_type num_keys{400}; + // Limit key count for small types: keys start at 1, sentinel is -1 + constexpr size_type num_keys = (sizeof(Key) == 1) ? 100 : 400; using container_type = cuco::static_set; diff --git a/tests/static_set/retrieve_test.cu b/tests/static_set/retrieve_test.cu index f5b7763dc..0908fc53f 100644 --- a/tests/static_set/retrieve_test.cu +++ b/tests/static_set/retrieve_test.cu @@ -28,6 +28,8 @@ #include +#include + static constexpr int key_sentinel = -1; template @@ -75,6 +77,13 @@ TEMPLATE_TEST_CASE_SIG( "static_set retrieve tests", "", ((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize), + (int8_t, cuco::test::probe_sequence::double_hashing, 1), + (int8_t, cuco::test::probe_sequence::linear_probing, 1), + (int8_t, cuco::test::probe_sequence::linear_probing, 2), + (int16_t, cuco::test::probe_sequence::double_hashing, 1), + (int16_t, cuco::test::probe_sequence::double_hashing, 2), + (int16_t, cuco::test::probe_sequence::linear_probing, 1), + (int16_t, cuco::test::probe_sequence::linear_probing, 2), (int32_t, cuco::test::probe_sequence::double_hashing, 1), (int32_t, cuco::test::probe_sequence::double_hashing, 2), (int64_t, cuco::test::probe_sequence::double_hashing, 1), @@ -92,7 +101,8 @@ TEMPLATE_TEST_CASE_SIG( #endif ) { - constexpr std::size_t num_keys{400}; + // Limit key count for small types: leave room for the -1 sentinel + constexpr std::size_t num_keys = (sizeof(Key) == 1) ? 100 : 400; constexpr double desired_load_factor = 1.; using probe = std::conditional_t +#include #include template @@ -47,6 +48,8 @@ __global__ void shared_memory_test_kernel(Ref* sets, auto insert_ref = sets[set_id].make_copy(g, sm_buffer, cuco::thread_scope_block); auto find_ref = insert_ref.rebind_operators(cuco::op::find); + g.sync(); + for (int i = g.thread_rank(); i < number_of_elements; i += g.size()) { auto found_it = find_ref.find(insterted_keys[offset + i]); @@ -67,6 +70,8 @@ __global__ void shared_memory_test_kernel(Ref* sets, TEMPLATE_TEST_CASE_SIG("static_set shared memory tests", "", ((typename Key), Key), + (int8_t), + (int16_t), (int32_t), (int64_t) #if defined(CUCO_HAS_128BIT_ATOMICS) @@ -75,8 +80,12 @@ TEMPLATE_TEST_CASE_SIG("static_set shared memory tests", #endif ) { - constexpr std::size_t number_of_sets = 1000; - constexpr std::size_t elements_in_set = 500; + // For int8_t: sentinel = -1, so usable key range is -128...127 (excluding -1). + // For int16_t: sentinel = -1, so usable key range is -32768..32767 (excluding -1). + // thrust::sequence over number_of_sets*elements_in_set keys must not wrap. + // Use smaller set count and element count for smaller types. + constexpr std::size_t number_of_sets = (sizeof(Key) <= 2) ? (sizeof(Key) == 1 ? 2 : 100) : 1000; + constexpr std::size_t elements_in_set = (sizeof(Key) <= 2) ? (sizeof(Key) == 1 ? 100 : 600) : 500; constexpr std::size_t set_capacity = 2 * elements_in_set; using extent_type = cuco::extent; @@ -96,7 +105,8 @@ TEMPLATE_TEST_CASE_SIG("static_set shared memory tests", // operator yet std::vector> sets; for (std::size_t set_id = 0; set_id < number_of_sets; ++set_id) { - sets.push_back(std::make_unique(extent_type{}, cuco::empty_key{-1})); + sets.push_back( + std::make_unique(extent_type{}, cuco::empty_key{static_cast(-1)})); } thrust::device_vector d_keys_exist(number_of_sets * elements_in_set); diff --git a/tests/static_set/stream_test.cu b/tests/static_set/stream_test.cu index f9277ed8f..a8d73bf33 100644 --- a/tests/static_set/stream_test.cu +++ b/tests/static_set/stream_test.cu @@ -30,9 +30,13 @@ #include +#include + TEMPLATE_TEST_CASE_SIG("static_set: operations on different stream than constructor", "", ((typename Key), Key), + (int8_t), + (int16_t), (int32_t), (int64_t) #if defined(CUCO_HAS_128BIT_ATOMICS) @@ -47,14 +51,17 @@ TEMPLATE_TEST_CASE_SIG("static_set: operations on different stream than construc CUCO_CUDA_TRY(cudaStreamCreate(&operation_stream)); { // Scope ensures set is destroyed before streams - constexpr std::size_t num_keys{500'000}; - auto set = cuco::static_set{num_keys * 2, + // Scale num_keys to fit in the key type's value range (sentinel = -1). + constexpr std::size_t num_keys = (sizeof(Key) == 1) ? 100 + : (sizeof(Key) == 2) ? 1'000 + : 500'000; + auto set = cuco::static_set{num_keys * 2, cuco::empty_key{static_cast(-1)}, - {}, + {}, cuco::linear_probing<1, cuco::default_hash_function>{}, - {}, - {}, - {}, + {}, + {}, + {}, constructor_stream}; thrust::device_vector d_keys(num_keys); diff --git a/tests/static_set/unique_sequence_test.cu b/tests/static_set/unique_sequence_test.cu index 2db91c2b6..f32ba94c4 100644 --- a/tests/static_set/unique_sequence_test.cu +++ b/tests/static_set/unique_sequence_test.cu @@ -29,6 +29,8 @@ #include +#include + using size_type = int32_t; int32_t constexpr SENTINEL = -1; @@ -141,6 +143,13 @@ TEMPLATE_TEST_CASE_SIG( "static_set unique sequence tests", "", ((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize), + (int8_t, cuco::test::probe_sequence::double_hashing, 1), + (int8_t, cuco::test::probe_sequence::linear_probing, 1), + (int8_t, cuco::test::probe_sequence::linear_probing, 2), + (int16_t, cuco::test::probe_sequence::double_hashing, 1), + (int16_t, cuco::test::probe_sequence::double_hashing, 2), + (int16_t, cuco::test::probe_sequence::linear_probing, 1), + (int16_t, cuco::test::probe_sequence::linear_probing, 2), (int32_t, cuco::test::probe_sequence::double_hashing, 1), (int32_t, cuco::test::probe_sequence::double_hashing, 2), (int64_t, cuco::test::probe_sequence::double_hashing, 1), @@ -158,17 +167,23 @@ TEMPLATE_TEST_CASE_SIG( #endif ) { - constexpr size_type num_keys{400}; + // Limit key count for small types: leave room for the -1 sentinel + constexpr size_type num_keys = (sizeof(Key) == 1) ? 100 : 400; using probe = std::conditional_t>, cuco::double_hashing>>; constexpr size_type gold_capacity = [&]() { if constexpr (cuco::is_double_hashing::value) { - return (CGSize == 1) ? 422 // 211 x 1 x 2 - : 404; // 101 x 2 x 2 + if constexpr (num_keys == 100) { + return (CGSize == 1) ? 106 // 53 x 1 x 2 + : 106; + } else { + return (CGSize == 1) ? 422 // 211 x 1 x 2 + : 404; // 101 x 2 x 2 + } } else { - return 400; + return num_keys; } }();