diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt index d7fb7bfd3a..f4a2729417 100644 --- a/c/CMakeLists.txt +++ b/c/CMakeLists.txt @@ -89,6 +89,7 @@ add_library( src/neighbors/brute_force.cpp src/neighbors/ivf_flat.cpp src/neighbors/ivf_pq.cpp + src/neighbors/ivf_sq.cpp src/neighbors/cagra.cpp $<$:src/neighbors/hnsw.cpp> $<$:src/neighbors/mg_ivf_pq.cpp> diff --git a/c/include/cuvs/core/all.h b/c/include/cuvs/core/all.h index 6834f1b095..545c7ec6f4 100644 --- a/c/include/cuvs/core/all.h +++ b/c/include/cuvs/core/all.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/c/include/cuvs/neighbors/ivf_sq.h b/c/include/cuvs/neighbors/ivf_sq.h new file mode 100644 index 0000000000..a771bc0d4d --- /dev/null +++ b/c/include/cuvs/neighbors/ivf_sq.h @@ -0,0 +1,346 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @defgroup ivf_sq_c_index_params IVF-SQ index build parameters + * @{ + */ +/** + * @brief Supplemental parameters to build IVF-SQ Index + * + */ +struct cuvsIvfSqIndexParams { + /** Distance type. */ + cuvsDistanceType metric; + /** The argument used by some distance metrics. */ + float metric_arg; + /** + * Whether to add the dataset content to the index, i.e.: + * + * - `true` means the index is filled with the dataset vectors and ready to search after calling + * `build`. + * - `false` means `build` only trains the underlying model (e.g. quantizer or clustering), but + * the index is left empty; you'd need to call `extend` on the index afterwards to populate it. + */ + bool add_data_on_build; + /** The number of inverted lists (clusters) */ + uint32_t n_lists; + /** The number of iterations searching for kmeans centers (index building). */ + uint32_t kmeans_n_iters; + /** + * The number of data vectors per cluster to use during iterative kmeans building. + * The index uses at most `n_lists * max_train_points_per_cluster` rows for training. + */ + uint32_t max_train_points_per_cluster; + /** + * By default, the algorithm allocates more space than necessary for individual clusters + * (`list_data`). This allows to amortize the cost of memory allocation and reduce the number of + * data copies during repeated calls to `extend` (extending the database). + * + * The alternative is the conservative allocation behavior; when enabled, the algorithm always + * allocates the minimum amount of memory required to store the given number of records. Set this + * flag to `true` if you prefer to use as little GPU memory for the database as possible. + */ + bool conservative_memory_allocation; +}; + +typedef struct cuvsIvfSqIndexParams* cuvsIvfSqIndexParams_t; + +/** + * @brief Allocate IVF-SQ Index params, and populate with default values + * + * @param[in] index_params cuvsIvfSqIndexParams_t to allocate + * @return cuvsError_t + */ +CUVS_EXPORT cuvsError_t cuvsIvfSqIndexParamsCreate(cuvsIvfSqIndexParams_t* index_params); + +/** + * @brief De-allocate IVF-SQ Index params + * + * @param[in] index_params + * @return cuvsError_t + */ +CUVS_EXPORT cuvsError_t cuvsIvfSqIndexParamsDestroy(cuvsIvfSqIndexParams_t index_params); +/** + * @} + */ + +/** + * @defgroup ivf_sq_c_search_params IVF-SQ index search parameters + * @{ + */ +/** + * @brief Supplemental parameters to search IVF-SQ index + * + */ +struct cuvsIvfSqSearchParams { + /** The number of clusters to search. */ + uint32_t n_probes; +}; + +typedef struct cuvsIvfSqSearchParams* cuvsIvfSqSearchParams_t; + +/** + * @brief Allocate IVF-SQ search params, and populate with default values + * + * @param[in] params cuvsIvfSqSearchParams_t to allocate + * @return cuvsError_t + */ +CUVS_EXPORT cuvsError_t cuvsIvfSqSearchParamsCreate(cuvsIvfSqSearchParams_t* params); + +/** + * @brief De-allocate IVF-SQ search params + * + * @param[in] params + * @return cuvsError_t + */ +CUVS_EXPORT cuvsError_t cuvsIvfSqSearchParamsDestroy(cuvsIvfSqSearchParams_t params); +/** + * @} + */ + +/** + * @defgroup ivf_sq_c_index IVF-SQ index + * @{ + */ +/** + * @brief Struct to hold address of cuvs::neighbors::ivf_sq::index and its active trained dtype + * + */ +typedef struct { + uintptr_t addr; + DLDataType dtype; +} cuvsIvfSqIndex; + +typedef cuvsIvfSqIndex* cuvsIvfSqIndex_t; + +/** + * @brief Allocate IVF-SQ index + * + * @param[in] index cuvsIvfSqIndex_t to allocate + * @return cuvsError_t + */ +CUVS_EXPORT cuvsError_t cuvsIvfSqIndexCreate(cuvsIvfSqIndex_t* index); + +/** + * @brief De-allocate IVF-SQ index + * + * @param[in] index cuvsIvfSqIndex_t to de-allocate + */ +CUVS_EXPORT cuvsError_t cuvsIvfSqIndexDestroy(cuvsIvfSqIndex_t index); + +/** Get the number of clusters/inverted lists */ +CUVS_EXPORT cuvsError_t cuvsIvfSqIndexGetNLists(cuvsIvfSqIndex_t index, int64_t* n_lists); + +/** Get the dimensionality of the data */ +CUVS_EXPORT cuvsError_t cuvsIvfSqIndexGetDim(cuvsIvfSqIndex_t index, int64_t* dim); + +/** Get the size of the index */ +CUVS_EXPORT cuvsError_t cuvsIvfSqIndexGetSize(cuvsIvfSqIndex_t index, int64_t* size); + +/** + * @brief Get the cluster centers corresponding to the lists [n_lists, dim] + * + * @param[in] index cuvsIvfSqIndex_t Built Ivf-SQ Index + * @param[out] centers Preallocated array on host or device memory to store output, [n_lists, dim] + * @return cuvsError_t + */ +CUVS_EXPORT cuvsError_t cuvsIvfSqIndexGetCenters(cuvsIvfSqIndex_t index, DLManagedTensor* centers); + +/** + * @} + */ + +/** + * @defgroup ivf_sq_c_index_build IVF-SQ index build + * @{ + */ +/** + * @brief Build an IVF-SQ index with a `DLManagedTensor` which has underlying + * `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`, + * or `kDLCPU`. Also, acceptable underlying types are: + * 1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` + * 2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16` + * + * @code {.c} + * #include + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // Assume a populated `DLManagedTensor` type here + * DLManagedTensor dataset; + * + * // Create default index params + * cuvsIvfSqIndexParams_t index_params; + * cuvsError_t params_create_status = cuvsIvfSqIndexParamsCreate(&index_params); + * + * // Create IVF-SQ index + * cuvsIvfSqIndex_t index; + * cuvsError_t index_create_status = cuvsIvfSqIndexCreate(&index); + * + * // Build the IVF-SQ Index + * cuvsError_t build_status = cuvsIvfSqBuild(res, index_params, &dataset, index); + * + * // de-allocate `index_params`, `index` and `res` + * cuvsError_t params_destroy_status = cuvsIvfSqIndexParamsDestroy(index_params); + * cuvsError_t index_destroy_status = cuvsIvfSqIndexDestroy(index); + * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res); + * @endcode + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] index_params cuvsIvfSqIndexParams_t used to build IVF-SQ index + * @param[in] dataset DLManagedTensor* training dataset + * @param[out] index cuvsIvfSqIndex_t Newly built IVF-SQ index + * @return cuvsError_t + */ +CUVS_EXPORT cuvsError_t cuvsIvfSqBuild(cuvsResources_t res, + cuvsIvfSqIndexParams_t index_params, + DLManagedTensor* dataset, + cuvsIvfSqIndex_t index); +/** + * @} + */ + +/** + * @defgroup ivf_sq_c_index_search IVF-SQ index search + * @{ + */ +/** + * @brief Search an IVF-SQ index with a `DLManagedTensor` which has underlying + * `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`. + * Types for input are: + * 1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` or 16 + * 2. `neighbors`: `kDLDataType.code == kDLInt` and `kDLDataType.bits = 64` + * 3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` + * + * @code {.c} + * #include + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // Assume a populated `DLManagedTensor` type here + * DLManagedTensor queries; + * DLManagedTensor neighbors; + * DLManagedTensor distances; + * + * // Create default search params + * cuvsIvfSqSearchParams_t search_params; + * cuvsError_t params_create_status = cuvsIvfSqSearchParamsCreate(&search_params); + * + * // Search the `index` built using `cuvsIvfSqBuild` + * cuvsError_t search_status = cuvsIvfSqSearch( + * res, search_params, index, &queries, &neighbors, &distances, (cuvsFilter){}); + * + * // de-allocate `search_params` and `res` + * cuvsError_t params_destroy_status = cuvsIvfSqSearchParamsDestroy(search_params); + * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res); + * @endcode + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] search_params cuvsIvfSqSearchParams_t used to search IVF-SQ index + * @param[in] index ivfSqIndex which has been returned by `cuvsIvfSqBuild` + * @param[in] queries DLManagedTensor* queries dataset to search + * @param[out] neighbors DLManagedTensor* output `k` neighbors for queries + * @param[out] distances DLManagedTensor* output `k` distances for queries + * @param[in] filter cuvsFilter input filter that can be used + * to filter queries and neighbors based on the given bitset. + */ +CUVS_EXPORT cuvsError_t cuvsIvfSqSearch(cuvsResources_t res, + cuvsIvfSqSearchParams_t search_params, + cuvsIvfSqIndex_t index, + DLManagedTensor* queries, + DLManagedTensor* neighbors, + DLManagedTensor* distances, + cuvsFilter filter); + +/** + * @} + */ + +/** + * @defgroup ivf_sq_c_index_serialize IVF-SQ C-API serialize functions + * @{ + */ +/** + * Save the index to file. + * + * Experimental, both the API and the serialization format are subject to change. + * + * @code{.c} + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // create an index with `cuvsIvfSqBuild` + * cuvsIvfSqSerialize(res, "/path/to/index", index); + * @endcode + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] filename the file name for saving the index + * @param[in] index IVF-SQ index + */ +CUVS_EXPORT cuvsError_t cuvsIvfSqSerialize(cuvsResources_t res, const char* filename, cuvsIvfSqIndex_t index); + +/** + * Load index from file. + * + * Experimental, both the API and the serialization format are subject to change. + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] filename the name of the file that stores the index + * @param[out] index IVF-SQ index loaded from disk + */ +CUVS_EXPORT cuvsError_t cuvsIvfSqDeserialize(cuvsResources_t res, + const char* filename, + cuvsIvfSqIndex_t index); +/** + * @} + */ + +/** + * @defgroup ivf_sq_c_index_extend IVF-SQ index extend + * @{ + */ +/** + * @brief Extend the index with the new data. + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] new_vectors DLManagedTensor* the new vectors to add to the index + * @param[in] new_indices DLManagedTensor* vector of new indices for the new vectors. If the index + * is empty, this can be NULL to imply a continuous range `[0...n_rows)`. + * @param[inout] index IVF-SQ index to be extended + * @return cuvsError_t + */ +CUVS_EXPORT cuvsError_t cuvsIvfSqExtend(cuvsResources_t res, + DLManagedTensor* new_vectors, + DLManagedTensor* new_indices, + cuvsIvfSqIndex_t index); +/** + * @} + */ +#ifdef __cplusplus +} +#endif diff --git a/c/src/neighbors/ivf_sq.cpp b/c/src/neighbors/ivf_sq.cpp new file mode 100644 index 0000000000..2656338d31 --- /dev/null +++ b/c/src/neighbors/ivf_sq.cpp @@ -0,0 +1,369 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include "../core/exceptions.hpp" +#include "../core/interop.hpp" + +namespace cuvs::neighbors::ivf_sq { +void convert_c_index_params(cuvsIvfSqIndexParams params, + cuvs::neighbors::ivf_sq::index_params* out) +{ + out->metric = static_cast((int)params.metric); + out->metric_arg = params.metric_arg; + out->add_data_on_build = params.add_data_on_build; + out->n_lists = params.n_lists; + out->kmeans_n_iters = params.kmeans_n_iters; + out->max_train_points_per_cluster = params.max_train_points_per_cluster; + out->conservative_memory_allocation = params.conservative_memory_allocation; +} +void convert_c_search_params(cuvsIvfSqSearchParams params, + cuvs::neighbors::ivf_sq::search_params* out) +{ + out->n_probes = params.n_probes; +} +} // namespace cuvs::neighbors::ivf_sq + +namespace { + +using index_type = cuvs::neighbors::ivf_sq::index; + +void _reset_index(cuvsIvfSqIndex_t index) +{ + RAFT_EXPECTS(index != nullptr, "index cannot be null"); + auto index_ptr = reinterpret_cast(index->addr); + index->addr = 0; + index->dtype = DLDataType{}; + delete index_ptr; +} + +template +void* _build(cuvsResources_t res, cuvsIvfSqIndexParams params, DLManagedTensor* dataset_tensor) +{ + auto res_ptr = reinterpret_cast(res); + + auto build_params = cuvs::neighbors::ivf_sq::index_params(); + cuvs::neighbors::ivf_sq::convert_c_index_params(params, &build_params); + + auto dataset = dataset_tensor->dl_tensor; + + if (cuvs::core::is_dlpack_device_compatible(dataset)) { + using mdspan_type = raft::device_matrix_view; + auto mds = cuvs::core::from_dlpack(dataset_tensor); + return new index_type(cuvs::neighbors::ivf_sq::build(*res_ptr, build_params, mds)); + } else { + using mdspan_type = raft::host_matrix_view; + auto mds = cuvs::core::from_dlpack(dataset_tensor); + return new index_type(cuvs::neighbors::ivf_sq::build(*res_ptr, build_params, mds)); + } +} + +template +void _search(cuvsResources_t res, + cuvsIvfSqSearchParams params, + cuvsIvfSqIndex index, + DLManagedTensor* queries_tensor, + DLManagedTensor* neighbors_tensor, + DLManagedTensor* distances_tensor, + cuvsFilter* filter) +{ + auto res_ptr = reinterpret_cast(res); + auto index_ptr = reinterpret_cast*>(index.addr); + + auto search_params = cuvs::neighbors::ivf_sq::search_params(); + cuvs::neighbors::ivf_sq::convert_c_search_params(params, &search_params); + + using queries_mdspan_type = raft::device_matrix_view; + using neighbors_mdspan_type = raft::device_matrix_view; + using distances_mdspan_type = raft::device_matrix_view; + auto queries_mds = cuvs::core::from_dlpack(queries_tensor); + auto neighbors_mds = cuvs::core::from_dlpack(neighbors_tensor); + auto distances_mds = cuvs::core::from_dlpack(distances_tensor); + + if (filter == nullptr || filter->type == NO_FILTER) { + cuvs::neighbors::ivf_sq::search( + *res_ptr, search_params, *index_ptr, queries_mds, neighbors_mds, distances_mds); + } else if (filter->type == BITSET) { + using filter_mdspan_type = raft::device_vector_view; + auto removed_indices_tensor = reinterpret_cast(filter->addr); + auto removed_indices = cuvs::core::from_dlpack(removed_indices_tensor); + cuvs::core::bitset_view removed_indices_bitset(removed_indices, + index_ptr->size()); + auto bitset_filter_obj = cuvs::neighbors::filtering::bitset_filter(removed_indices_bitset); + cuvs::neighbors::ivf_sq::search(*res_ptr, + search_params, + *index_ptr, + queries_mds, + neighbors_mds, + distances_mds, + bitset_filter_obj); + } else { + RAFT_FAIL("Unsupported filter type: BITMAP"); + } +} + +void _serialize(cuvsResources_t res, const char* filename, cuvsIvfSqIndex index) +{ + auto res_ptr = reinterpret_cast(res); + auto index_ptr = reinterpret_cast*>(index.addr); + cuvs::neighbors::ivf_sq::serialize(*res_ptr, std::string(filename), *index_ptr); +} + +void* _deserialize(cuvsResources_t res, const char* filename) +{ + auto res_ptr = reinterpret_cast(res); + auto index = new cuvs::neighbors::ivf_sq::index(*res_ptr); + cuvs::neighbors::ivf_sq::deserialize(*res_ptr, std::string(filename), index); + return index; +} + +template +void _extend(cuvsResources_t res, + DLManagedTensor* new_vectors, + DLManagedTensor* new_indices, + cuvsIvfSqIndex index) +{ + auto res_ptr = reinterpret_cast(res); + auto index_ptr = reinterpret_cast*>(index.addr); + + bool on_device = cuvs::core::is_dlpack_device_compatible(new_vectors->dl_tensor); + if (new_indices != nullptr && + on_device != cuvs::core::is_dlpack_device_compatible(new_indices->dl_tensor)) { + RAFT_FAIL("extend inputs must both either be on device memory or host memory"); + } + + if (on_device) { + using vectors_mdspan_type = raft::device_matrix_view; + using indices_mdspan_type = raft::device_vector_view; + auto vectors_mds = cuvs::core::from_dlpack(new_vectors); + std::optional indices_mds; + if (new_indices != nullptr) { + indices_mds.emplace(cuvs::core::from_dlpack(new_indices)); + } + cuvs::neighbors::ivf_sq::extend(*res_ptr, vectors_mds, indices_mds, index_ptr); + } else { + using vectors_mdspan_type = raft::host_matrix_view; + using indices_mdspan_type = raft::host_vector_view; + auto vectors_mds = cuvs::core::from_dlpack(new_vectors); + std::optional indices_mds; + if (new_indices != nullptr) { + indices_mds.emplace(cuvs::core::from_dlpack(new_indices)); + } + cuvs::neighbors::ivf_sq::extend(*res_ptr, vectors_mds, indices_mds, index_ptr); + } +} + +void _get_centers(cuvsIvfSqIndex index, DLManagedTensor* centers) +{ + auto index_ptr = reinterpret_cast*>(index.addr); + cuvs::core::to_dlpack(index_ptr->centers(), centers); +} +} // namespace + +extern "C" cuvsError_t cuvsIvfSqIndexCreate(cuvsIvfSqIndex_t* index) +{ + return cuvs::core::translate_exceptions([=] { *index = new cuvsIvfSqIndex{}; }); +} + +extern "C" cuvsError_t cuvsIvfSqIndexDestroy(cuvsIvfSqIndex_t index_c_ptr) +{ + return cuvs::core::translate_exceptions([=] { + _reset_index(index_c_ptr); + delete index_c_ptr; + }); +} + +extern "C" cuvsError_t cuvsIvfSqBuild(cuvsResources_t res, + cuvsIvfSqIndexParams_t params, + DLManagedTensor* dataset_tensor, + cuvsIvfSqIndex_t index) +{ + return cuvs::core::translate_exceptions([=] { + auto dataset = dataset_tensor->dl_tensor; + + if (dataset.dtype.code != kDLFloat || + (dataset.dtype.bits != 32 && dataset.dtype.bits != 16)) { + RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d", + dataset.dtype.code, + dataset.dtype.bits); + } + + _reset_index(index); + + index->dtype.code = dataset.dtype.code; + index->dtype.bits = dataset.dtype.bits; + + if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 32) { + index->addr = reinterpret_cast(_build(res, *params, dataset_tensor)); + } else if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 16) { + index->addr = reinterpret_cast(_build(res, *params, dataset_tensor)); + } + }); +} + +static cuvsError_t _cuvsIvfSqSearchImpl(cuvsResources_t res, + cuvsIvfSqSearchParams_t params, + cuvsIvfSqIndex_t index_c_ptr, + DLManagedTensor* queries_tensor, + DLManagedTensor* neighbors_tensor, + DLManagedTensor* distances_tensor, + cuvsFilter* filter) +{ + return cuvs::core::translate_exceptions([=] { + auto queries = queries_tensor->dl_tensor; + auto neighbors = neighbors_tensor->dl_tensor; + auto distances = distances_tensor->dl_tensor; + + RAFT_EXPECTS(cuvs::core::is_dlpack_device_compatible(queries), + "queries should have device compatible memory"); + RAFT_EXPECTS(cuvs::core::is_dlpack_device_compatible(neighbors), + "neighbors should have device compatible memory"); + RAFT_EXPECTS(cuvs::core::is_dlpack_device_compatible(distances), + "distances should have device compatible memory"); + + RAFT_EXPECTS(neighbors.dtype.code == kDLInt && neighbors.dtype.bits == 64, + "neighbors should be of type int64_t"); + RAFT_EXPECTS(distances.dtype.code == kDLFloat && distances.dtype.bits == 32, + "distances should be of type float32"); + + auto index = *index_c_ptr; + if (queries.dtype.code == kDLFloat && queries.dtype.bits == 32) { + _search( + res, *params, index, queries_tensor, neighbors_tensor, distances_tensor, filter); + } else if (queries.dtype.code == kDLFloat && queries.dtype.bits == 16) { + _search( + res, *params, index, queries_tensor, neighbors_tensor, distances_tensor, filter); + } else { + RAFT_FAIL("Unsupported queries DLtensor dtype: %d and bits: %d", + queries.dtype.code, + queries.dtype.bits); + } + }); +} + +extern "C" cuvsError_t cuvsIvfSqSearch(cuvsResources_t res, + cuvsIvfSqSearchParams_t params, + cuvsIvfSqIndex_t index_c_ptr, + DLManagedTensor* queries_tensor, + DLManagedTensor* neighbors_tensor, + DLManagedTensor* distances_tensor, + cuvsFilter filter) +{ + return _cuvsIvfSqSearchImpl( + res, params, index_c_ptr, queries_tensor, neighbors_tensor, distances_tensor, &filter); +} + +extern "C" cuvsError_t cuvsIvfSqIndexParamsCreate(cuvsIvfSqIndexParams_t* params) +{ + return cuvs::core::translate_exceptions([=] { + *params = new cuvsIvfSqIndexParams{.metric = L2Expanded, + .metric_arg = 2.0f, + .add_data_on_build = true, + .n_lists = 1024, + .kmeans_n_iters = 20, + .max_train_points_per_cluster = 256, + .conservative_memory_allocation = false}; + }); +} + +extern "C" cuvsError_t cuvsIvfSqIndexParamsDestroy(cuvsIvfSqIndexParams_t params) +{ + return cuvs::core::translate_exceptions([=] { delete params; }); +} + +extern "C" cuvsError_t cuvsIvfSqSearchParamsCreate(cuvsIvfSqSearchParams_t* params) +{ + return cuvs::core::translate_exceptions( + [=] { *params = new cuvsIvfSqSearchParams{.n_probes = 20}; }); +} + +extern "C" cuvsError_t cuvsIvfSqSearchParamsDestroy(cuvsIvfSqSearchParams_t params) +{ + return cuvs::core::translate_exceptions([=] { delete params; }); +} + +extern "C" cuvsError_t cuvsIvfSqDeserialize(cuvsResources_t res, + const char* filename, + cuvsIvfSqIndex_t index) +{ + return cuvs::core::translate_exceptions([=] { + _reset_index(index); + index->addr = reinterpret_cast(_deserialize(res, filename)); + }); +} + +extern "C" cuvsError_t cuvsIvfSqSerialize(cuvsResources_t res, + const char* filename, + cuvsIvfSqIndex_t index) +{ + return cuvs::core::translate_exceptions([=] { _serialize(res, filename, *index); }); +} + +extern "C" cuvsError_t cuvsIvfSqExtend(cuvsResources_t res, + DLManagedTensor* new_vectors, + DLManagedTensor* new_indices, + cuvsIvfSqIndex_t index) +{ + return cuvs::core::translate_exceptions([=] { + auto vectors = new_vectors->dl_tensor; + + if (index->dtype.code == 0 && index->dtype.bits == 0) { + index->dtype.code = vectors.dtype.code; + index->dtype.bits = vectors.dtype.bits; + } + + if (vectors.dtype.code == kDLFloat && vectors.dtype.bits == 32) { + _extend(res, new_vectors, new_indices, *index); + } else if (vectors.dtype.code == kDLFloat && vectors.dtype.bits == 16) { + _extend(res, new_vectors, new_indices, *index); + } else { + RAFT_FAIL( + "Unsupported vectors DLtensor dtype: %d and bits: %d", vectors.dtype.code, vectors.dtype.bits); + } + }); +} + +extern "C" cuvsError_t cuvsIvfSqIndexGetNLists(cuvsIvfSqIndex_t index, int64_t* n_lists) +{ + return cuvs::core::translate_exceptions([=] { + auto index_ptr = + reinterpret_cast*>(index->addr); + *n_lists = index_ptr->n_lists(); + }); +} + +extern "C" cuvsError_t cuvsIvfSqIndexGetDim(cuvsIvfSqIndex_t index, int64_t* dim) +{ + return cuvs::core::translate_exceptions([=] { + auto index_ptr = + reinterpret_cast*>(index->addr); + *dim = index_ptr->dim(); + }); +} + +extern "C" cuvsError_t cuvsIvfSqIndexGetSize(cuvsIvfSqIndex_t index, int64_t* size) +{ + return cuvs::core::translate_exceptions([=] { + auto index_ptr = + reinterpret_cast*>(index->addr); + *size = index_ptr->size(); + }); +} + +extern "C" cuvsError_t cuvsIvfSqIndexGetCenters(cuvsIvfSqIndex_t index, DLManagedTensor* centers) +{ + return cuvs::core::translate_exceptions([=] { _get_centers(*index, centers); }); +} diff --git a/c/src/neighbors/ivf_sq.hpp b/c/src/neighbors/ivf_sq.hpp new file mode 100644 index 0000000000..3a08bc689a --- /dev/null +++ b/c/src/neighbors/ivf_sq.hpp @@ -0,0 +1,14 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#include +#include + +namespace cuvs::neighbors::ivf_sq { +/// Converts a cuvsIvfSqIndexParams struct (c) to a ivf_sq::index_params (C++) struct +void convert_c_index_params(cuvsIvfSqIndexParams params, + cuvs::neighbors::ivf_sq::index_params* out); +void convert_c_search_params(cuvsIvfSqSearchParams params, + cuvs::neighbors::ivf_sq::search_params* out); +} // namespace cuvs::neighbors::ivf_sq diff --git a/c/tests/CMakeLists.txt b/c/tests/CMakeLists.txt index a1e05e4168..f1cff7824e 100644 --- a/c/tests/CMakeLists.txt +++ b/c/tests/CMakeLists.txt @@ -82,6 +82,7 @@ ConfigureTest(NAME KMEANS_C_TEST PATH cluster/kmeans_c.cu) ConfigureTest(NAME BRUTEFORCE_C_TEST PATH neighbors/run_brute_force_c.c neighbors/brute_force_c.cu) ConfigureTest(NAME IVF_FLAT_C_TEST PATH neighbors/run_ivf_flat_c.c neighbors/ann_ivf_flat_c.cu) ConfigureTest(NAME IVF_PQ_C_TEST PATH neighbors/run_ivf_pq_c.c neighbors/ann_ivf_pq_c.cu) +ConfigureTest(NAME IVF_SQ_C_TEST PATH neighbors/run_ivf_sq_c.c neighbors/ann_ivf_sq_c.cu) ConfigureTest(NAME CAGRA_C_TEST PATH neighbors/ann_cagra_c.cu) ConfigureTest(NAME MG_C_TEST PATH neighbors/run_mg_c.c neighbors/ann_mg_c.cu) ConfigureTest( diff --git a/c/tests/neighbors/ann_ivf_sq_c.cu b/c/tests/neighbors/ann_ivf_sq_c.cu new file mode 100644 index 0000000000..42c1b29999 --- /dev/null +++ b/c/tests/neighbors/ann_ivf_sq_c.cu @@ -0,0 +1,140 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include + +#include "neighbors/ann_utils.cuh" +#include + +extern "C" void run_ivf_sq(cuvsResources_t res, + int64_t n_rows, + int64_t n_queries, + int64_t n_dim, + uint32_t n_neighbors, + float* index_data, + float* query_data, + float* distances_data, + int64_t* neighbors_data, + cuvsDistanceType metric, + size_t n_probes, + size_t n_lists); + +template +void generate_random_data(raft::handle_t const& handle, T* devPtr, size_t size) +{ + raft::random::RngState r(1234ULL); + raft::random::uniform(handle, r, devPtr, size, T(0.1), T(2.0)); +}; + +template +void recall_eval(raft::handle_t const& handle, + T* query_data, + T* index_data, + IdxT* neighbors, + T* distances, + size_t n_queries, + size_t n_rows, + size_t n_dim, + size_t n_neighbors, + cuvsDistanceType metric, + size_t n_probes, + size_t n_lists) +{ + auto distances_ref = raft::make_device_matrix(handle, n_queries, n_neighbors); + auto neighbors_ref = raft::make_device_matrix(handle, n_queries, n_neighbors); + cuvs::neighbors::naive_knn( + handle, + distances_ref.data_handle(), + neighbors_ref.data_handle(), + query_data, + index_data, + n_queries, + n_rows, + n_dim, + n_neighbors, + static_cast((uint16_t)metric)); + + size_t size = n_queries * n_neighbors; + std::vector neighbors_h(size); + std::vector distances_h(size); + std::vector neighbors_ref_h(size); + std::vector distances_ref_h(size); + + auto stream = raft::resource::get_cuda_stream(handle); + raft::copy(neighbors_h.data(), neighbors, size, stream); + raft::copy(distances_h.data(), distances, size, stream); + raft::copy(neighbors_ref_h.data(), neighbors_ref.data_handle(), size, stream); + raft::copy(distances_ref_h.data(), distances_ref.data_handle(), size, stream); + raft::resource::sync_stream(handle); + + double min_recall = static_cast(n_probes) / static_cast(n_lists); + ASSERT_TRUE(cuvs::neighbors::eval_neighbours(neighbors_ref_h, + neighbors_h, + distances_ref_h, + distances_h, + n_queries, + n_neighbors, + 0.001, + min_recall)); +}; + +TEST(IvfSqC, BuildSearch) +{ + int64_t n_rows = 8096; + int64_t n_queries = 128; + int64_t n_dim = 32; + uint32_t n_neighbors = 8; + + raft::handle_t handle; + auto stream = raft::resource::get_cuda_stream(handle); + + cuvsDistanceType metric = L2Expanded; + size_t n_probes = 20; + size_t n_lists = 1024; + + rmm::device_uvector index_data(n_rows * n_dim, stream); + rmm::device_uvector query_data(n_queries * n_dim, stream); + rmm::device_uvector neighbors_data(n_queries * n_neighbors, stream); + rmm::device_uvector distances_data(n_queries * n_neighbors, stream); + + generate_random_data(handle, index_data.data(), n_rows * n_dim); + generate_random_data(handle, query_data.data(), n_queries * n_dim); + + cuvsResources_t res; + cuvsResourcesCreate(&res); + cuvsStreamSet(res, stream); + + run_ivf_sq(res, + n_rows, + n_queries, + n_dim, + n_neighbors, + index_data.data(), + query_data.data(), + distances_data.data(), + neighbors_data.data(), + metric, + n_probes, + n_lists); + + recall_eval(handle, + query_data.data(), + index_data.data(), + neighbors_data.data(), + distances_data.data(), + n_queries, + n_rows, + n_dim, + n_neighbors, + metric, + n_probes, + n_lists); + + cuvsResourcesDestroy(res); +} diff --git a/c/tests/neighbors/c_api.c b/c/tests/neighbors/c_api.c index 6988aaf618..86108ea703 100644 --- a/c/tests/neighbors/c_api.c +++ b/c/tests/neighbors/c_api.c @@ -1,11 +1,12 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #include #include #include +#include #include #include @@ -47,6 +48,15 @@ void test_compile_tiered_index() cuvsTieredIndexExtend(resources, &dataset, tiered_index); } +void test_compile_ivf_sq() +{ + assert(!"test_compile_ivf_sq is not meant to be run"); + + cuvsIvfSqIndex_t index; + cuvsIvfSqIndexCreate(&index); + cuvsIvfSqIndexDestroy(index); +} + void test_compile_all_neighbors() { // Smoke test to ensure that the all_neighbors.h API compiles correctly @@ -66,6 +76,7 @@ int main() // These are smoke tests that check that the C-APIs compile with a C compiler. // These are not meant to be run. test_compile_cagra(); + test_compile_ivf_sq(); test_compile_tiered_index(); test_compile_all_neighbors(); diff --git a/c/tests/neighbors/run_ivf_sq_c.c b/c/tests/neighbors/run_ivf_sq_c.c new file mode 100644 index 0000000000..7f8f08b2d9 --- /dev/null +++ b/c/tests/neighbors/run_ivf_sq_c.c @@ -0,0 +1,89 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +void run_ivf_sq(cuvsResources_t res, + int64_t n_rows, + int64_t n_queries, + int64_t n_dim, + uint32_t n_neighbors, + float* index_data, + float* query_data, + float* distances_data, + int64_t* neighbors_data, + cuvsDistanceType metric, + size_t n_probes, + size_t n_lists) +{ + DLManagedTensor dataset_tensor; + dataset_tensor.dl_tensor.data = index_data; + dataset_tensor.dl_tensor.device.device_type = kDLCUDA; + dataset_tensor.dl_tensor.ndim = 2; + dataset_tensor.dl_tensor.dtype.code = kDLFloat; + dataset_tensor.dl_tensor.dtype.bits = 32; + dataset_tensor.dl_tensor.dtype.lanes = 1; + int64_t dataset_shape[2] = {n_rows, n_dim}; + dataset_tensor.dl_tensor.shape = dataset_shape; + dataset_tensor.dl_tensor.strides = NULL; + + cuvsIvfSqIndex_t index; + cuvsIvfSqIndexCreate(&index); + + cuvsIvfSqIndexParams_t build_params; + cuvsIvfSqIndexParamsCreate(&build_params); + build_params->metric = metric; + build_params->n_lists = n_lists; + cuvsIvfSqBuild(res, build_params, &dataset_tensor, index); + + DLManagedTensor queries_tensor; + queries_tensor.dl_tensor.data = (void*)query_data; + queries_tensor.dl_tensor.device.device_type = kDLCUDA; + queries_tensor.dl_tensor.ndim = 2; + queries_tensor.dl_tensor.dtype.code = kDLFloat; + queries_tensor.dl_tensor.dtype.bits = 32; + queries_tensor.dl_tensor.dtype.lanes = 1; + int64_t queries_shape[2] = {n_queries, n_dim}; + queries_tensor.dl_tensor.shape = queries_shape; + queries_tensor.dl_tensor.strides = NULL; + + DLManagedTensor neighbors_tensor; + neighbors_tensor.dl_tensor.data = (void*)neighbors_data; + neighbors_tensor.dl_tensor.device.device_type = kDLCUDA; + neighbors_tensor.dl_tensor.ndim = 2; + neighbors_tensor.dl_tensor.dtype.code = kDLInt; + neighbors_tensor.dl_tensor.dtype.bits = 64; + neighbors_tensor.dl_tensor.dtype.lanes = 1; + int64_t neighbors_shape[2] = {n_queries, n_neighbors}; + neighbors_tensor.dl_tensor.shape = neighbors_shape; + neighbors_tensor.dl_tensor.strides = NULL; + + DLManagedTensor distances_tensor; + distances_tensor.dl_tensor.data = (void*)distances_data; + distances_tensor.dl_tensor.device.device_type = kDLCUDA; + distances_tensor.dl_tensor.ndim = 2; + distances_tensor.dl_tensor.dtype.code = kDLFloat; + distances_tensor.dl_tensor.dtype.bits = 32; + distances_tensor.dl_tensor.dtype.lanes = 1; + int64_t distances_shape[2] = {n_queries, n_neighbors}; + distances_tensor.dl_tensor.shape = distances_shape; + distances_tensor.dl_tensor.strides = NULL; + + cuvsIvfSqSearchParams_t search_params; + cuvsIvfSqSearchParamsCreate(&search_params); + search_params->n_probes = n_probes; + cuvsIvfSqSearch(res, + search_params, + index, + &queries_tensor, + &neighbors_tensor, + &distances_tensor, + (cuvsFilter){.type = NO_FILTER}); + cuvsStreamSync(res); + + cuvsIvfSqSearchParamsDestroy(search_params); + cuvsIvfSqIndexParamsDestroy(build_params); + cuvsIvfSqIndexDestroy(index); +} diff --git a/fern/docs.yml b/fern/docs.yml index dde77caab1..db239ce019 100644 --- a/fern/docs.yml +++ b/fern/docs.yml @@ -274,6 +274,8 @@ navigation: path: "./pages/c_api/c-api-neighbors-ivf-flat.md" - page: "Neighbors IVF PQ" path: "./pages/c_api/c-api-neighbors-ivf-pq.md" + - page: "Neighbors IVF SQ" + path: "./pages/c_api/c-api-neighbors-ivf-sq.md" - page: "Neighbors NN Descent" path: "./pages/c_api/c-api-neighbors-nn-descent.md" - page: "Neighbors Refine" diff --git a/fern/pages/c_api/c-api-neighbors-ivf-sq.md b/fern/pages/c_api/c-api-neighbors-ivf-sq.md new file mode 100644 index 0000000000..d339c813af --- /dev/null +++ b/fern/pages/c_api/c-api-neighbors-ivf-sq.md @@ -0,0 +1,409 @@ +--- +slug: api-reference/c-api-neighbors-ivf-sq +--- + +# IVF SQ + +_Source header: `cuvs/neighbors/ivf_sq.h`_ + +## IVF-SQ index build parameters + + +### cuvsIvfSqIndexParams + +Supplemental parameters to build IVF-SQ Index + +```c +struct cuvsIvfSqIndexParams { + cuvsDistanceType metric; + float metric_arg; + bool add_data_on_build; + uint32_t n_lists; + uint32_t kmeans_n_iters; + uint32_t max_train_points_per_cluster; + bool conservative_memory_allocation; +}; +``` + +**Fields** + +| Name | Type | Description | +| --- | --- | --- | +| `metric` | [`cuvsDistanceType`](/api-reference/c-api-distance-distance#cuvsdistancetype) | Distance type. | +| `metric_arg` | `float` | The argument used by some distance metrics. | +| `add_data_on_build` | `bool` | Whether to add the dataset content to the index, i.e.:

- `true` means the index is filled with the dataset vectors and ready to search after calling `build`.
- `false` means `build` only trains the underlying model (e.g. quantizer or clustering), but the index is left empty; you'd need to call `extend` on the index afterwards to populate it. | +| `n_lists` | `uint32_t` | The number of inverted lists (clusters) | +| `kmeans_n_iters` | `uint32_t` | The number of iterations searching for kmeans centers (index building). | +| `max_train_points_per_cluster` | `uint32_t` | The number of data vectors per cluster to use during iterative kmeans building. The index uses at most `n_lists * max_train_points_per_cluster` rows for training. | +| `conservative_memory_allocation` | `bool` | By default, the algorithm allocates more space than necessary for individual clusters (`list_data`). This allows to amortize the cost of memory allocation and reduce the number of data copies during repeated calls to `extend` (extending the database).

The alternative is the conservative allocation behavior; when enabled, the algorithm always allocates the minimum amount of memory required to store the given number of records. Set this flag to `true` if you prefer to use as little GPU memory for the database as possible. | + + +### cuvsIvfSqIndexParamsCreate + +Allocate IVF-SQ Index params, and populate with default values + +```c +CUVS_EXPORT cuvsError_t cuvsIvfSqIndexParamsCreate(cuvsIvfSqIndexParams_t* index_params); +``` + +**Parameters** + +| Name | Direction | Type | Description | +| --- | --- | --- | --- | +| `index_params` | in | [`cuvsIvfSqIndexParams_t*`](/api-reference/c-api-neighbors-ivf-sq#cuvsivfsqindexparams) | cuvsIvfSqIndexParams_t to allocate | + +**Returns** + +[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t) + + +### cuvsIvfSqIndexParamsDestroy + +De-allocate IVF-SQ Index params + +```c +CUVS_EXPORT cuvsError_t cuvsIvfSqIndexParamsDestroy(cuvsIvfSqIndexParams_t index_params); +``` + +**Parameters** + +| Name | Direction | Type | Description | +| --- | --- | --- | --- | +| `index_params` | in | [`cuvsIvfSqIndexParams_t`](/api-reference/c-api-neighbors-ivf-sq#cuvsivfsqindexparams) | | + +**Returns** + +[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t) + +## IVF-SQ index search parameters + + +### cuvsIvfSqSearchParams + +Supplemental parameters to search IVF-SQ index + +```c +struct cuvsIvfSqSearchParams { + uint32_t n_probes; +}; +``` + +**Fields** + +| Name | Type | Description | +| --- | --- | --- | +| `n_probes` | `uint32_t` | The number of clusters to search. | + + +### cuvsIvfSqSearchParamsCreate + +Allocate IVF-SQ search params, and populate with default values + +```c +CUVS_EXPORT cuvsError_t cuvsIvfSqSearchParamsCreate(cuvsIvfSqSearchParams_t* params); +``` + +**Parameters** + +| Name | Direction | Type | Description | +| --- | --- | --- | --- | +| `params` | in | [`cuvsIvfSqSearchParams_t*`](/api-reference/c-api-neighbors-ivf-sq#cuvsivfsqsearchparams) | cuvsIvfSqSearchParams_t to allocate | + +**Returns** + +[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t) + + +### cuvsIvfSqSearchParamsDestroy + +De-allocate IVF-SQ search params + +```c +CUVS_EXPORT cuvsError_t cuvsIvfSqSearchParamsDestroy(cuvsIvfSqSearchParams_t params); +``` + +**Parameters** + +| Name | Direction | Type | Description | +| --- | --- | --- | --- | +| `params` | in | [`cuvsIvfSqSearchParams_t`](/api-reference/c-api-neighbors-ivf-sq#cuvsivfsqsearchparams) | | + +**Returns** + +[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t) + +## IVF-SQ index + + +### cuvsIvfSqIndex + +Struct to hold address of cuvs::neighbors::ivf_sq::index and its active trained dtype + +```c +typedef struct { + uintptr_t addr; + DLDataType dtype; +} cuvsIvfSqIndex; +``` + +**Fields** + +| Name | Type | Description | +| --- | --- | --- | +| `addr` | `uintptr_t` | | +| `dtype` | `DLDataType` | | + + +### cuvsIvfSqIndexCreate + +Allocate IVF-SQ index + +```c +CUVS_EXPORT cuvsError_t cuvsIvfSqIndexCreate(cuvsIvfSqIndex_t* index); +``` + +**Parameters** + +| Name | Direction | Type | Description | +| --- | --- | --- | --- | +| `index` | in | [`cuvsIvfSqIndex_t*`](/api-reference/c-api-neighbors-ivf-sq#cuvsivfsqindex) | cuvsIvfSqIndex_t to allocate | + +**Returns** + +[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t) + + +### cuvsIvfSqIndexDestroy + +De-allocate IVF-SQ index + +```c +CUVS_EXPORT cuvsError_t cuvsIvfSqIndexDestroy(cuvsIvfSqIndex_t index); +``` + +**Parameters** + +| Name | Direction | Type | Description | +| --- | --- | --- | --- | +| `index` | in | [`cuvsIvfSqIndex_t`](/api-reference/c-api-neighbors-ivf-sq#cuvsivfsqindex) | cuvsIvfSqIndex_t to de-allocate | + +**Returns** + +[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t) + + +### cuvsIvfSqIndexGetNLists + +Get the number of clusters/inverted lists + +```c +CUVS_EXPORT cuvsError_t cuvsIvfSqIndexGetNLists(cuvsIvfSqIndex_t index, int64_t* n_lists); +``` + +**Parameters** + +| Name | Direction | Type | Description | +| --- | --- | --- | --- | +| `index` | | [`cuvsIvfSqIndex_t`](/api-reference/c-api-neighbors-ivf-sq#cuvsivfsqindex) | | +| `n_lists` | | `int64_t*` | | + +**Returns** + +[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t) + + +### cuvsIvfSqIndexGetDim + +Get the dimensionality of the data + +```c +CUVS_EXPORT cuvsError_t cuvsIvfSqIndexGetDim(cuvsIvfSqIndex_t index, int64_t* dim); +``` + +**Parameters** + +| Name | Direction | Type | Description | +| --- | --- | --- | --- | +| `index` | | [`cuvsIvfSqIndex_t`](/api-reference/c-api-neighbors-ivf-sq#cuvsivfsqindex) | | +| `dim` | | `int64_t*` | | + +**Returns** + +[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t) + + +### cuvsIvfSqIndexGetSize + +Get the size of the index + +```c +CUVS_EXPORT cuvsError_t cuvsIvfSqIndexGetSize(cuvsIvfSqIndex_t index, int64_t* size); +``` + +**Parameters** + +| Name | Direction | Type | Description | +| --- | --- | --- | --- | +| `index` | | [`cuvsIvfSqIndex_t`](/api-reference/c-api-neighbors-ivf-sq#cuvsivfsqindex) | | +| `size` | | `int64_t*` | | + +**Returns** + +[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t) + + +### cuvsIvfSqIndexGetCenters + +Get the cluster centers corresponding to the lists [n_lists, dim] + +```c +CUVS_EXPORT cuvsError_t cuvsIvfSqIndexGetCenters(cuvsIvfSqIndex_t index, DLManagedTensor* centers); +``` + +**Parameters** + +| Name | Direction | Type | Description | +| --- | --- | --- | --- | +| `index` | in | [`cuvsIvfSqIndex_t`](/api-reference/c-api-neighbors-ivf-sq#cuvsivfsqindex) | cuvsIvfSqIndex_t Built Ivf-SQ Index | +| `centers` | out | `DLManagedTensor*` | Preallocated array on host or device memory to store output, [n_lists, dim] | + +**Returns** + +[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t) + +## IVF-SQ index build + + +### cuvsIvfSqBuild + +Build an IVF-SQ index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`, or `kDLCPU`. Also, acceptable underlying types are: 1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` 2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16` + +```c +CUVS_EXPORT cuvsError_t cuvsIvfSqBuild(cuvsResources_t res, +cuvsIvfSqIndexParams_t index_params, +DLManagedTensor* dataset, +cuvsIvfSqIndex_t index); +``` + +**Parameters** + +| Name | Direction | Type | Description | +| --- | --- | --- | --- | +| `res` | in | [`cuvsResources_t`](/api-reference/c-api-core-c-api#cuvsresources-t) | cuvsResources_t opaque C handle | +| `index_params` | in | [`cuvsIvfSqIndexParams_t`](/api-reference/c-api-neighbors-ivf-sq#cuvsivfsqindexparams) | cuvsIvfSqIndexParams_t used to build IVF-SQ index | +| `dataset` | in | `DLManagedTensor*` | DLManagedTensor* training dataset | +| `index` | out | [`cuvsIvfSqIndex_t`](/api-reference/c-api-neighbors-ivf-sq#cuvsivfsqindex) | cuvsIvfSqIndex_t Newly built IVF-SQ index | + +**Returns** + +[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t) + +## IVF-SQ index search + + +### cuvsIvfSqSearch + +Search an IVF-SQ index with a `DLManagedTensor` which has underlying `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`. Types for input are: 1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` or 16 2. `neighbors`: `kDLDataType.code == kDLInt` and `kDLDataType.bits = 64` 3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` + +```c +CUVS_EXPORT cuvsError_t cuvsIvfSqSearch(cuvsResources_t res, +cuvsIvfSqSearchParams_t search_params, +cuvsIvfSqIndex_t index, +DLManagedTensor* queries, +DLManagedTensor* neighbors, +DLManagedTensor* distances, +cuvsFilter filter); +``` + +**Parameters** + +| Name | Direction | Type | Description | +| --- | --- | --- | --- | +| `res` | in | [`cuvsResources_t`](/api-reference/c-api-core-c-api#cuvsresources-t) | cuvsResources_t opaque C handle | +| `search_params` | in | [`cuvsIvfSqSearchParams_t`](/api-reference/c-api-neighbors-ivf-sq#cuvsivfsqsearchparams) | cuvsIvfSqSearchParams_t used to search IVF-SQ index | +| `index` | in | [`cuvsIvfSqIndex_t`](/api-reference/c-api-neighbors-ivf-sq#cuvsivfsqindex) | ivfSqIndex which has been returned by `cuvsIvfSqBuild` | +| `queries` | in | `DLManagedTensor*` | DLManagedTensor* queries dataset to search | +| `neighbors` | out | `DLManagedTensor*` | DLManagedTensor* output `k` neighbors for queries | +| `distances` | out | `DLManagedTensor*` | DLManagedTensor* output `k` distances for queries | +| `filter` | in | [`cuvsFilter`](/api-reference/c-api-neighbors-common#cuvsfilter) | cuvsFilter input filter that can be used to filter queries and neighbors based on the given bitset. | + +**Returns** + +[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t) + +## IVF-SQ C-API serialize functions + + +### cuvsIvfSqSerialize + +Save the index to file. + +```c +CUVS_EXPORT cuvsError_t cuvsIvfSqSerialize(cuvsResources_t res, const char* filename, cuvsIvfSqIndex_t index); +``` + +Experimental, both the API and the serialization format are subject to change. + +**Parameters** + +| Name | Direction | Type | Description | +| --- | --- | --- | --- | +| `res` | in | [`cuvsResources_t`](/api-reference/c-api-core-c-api#cuvsresources-t) | cuvsResources_t opaque C handle | +| `filename` | in | `const char*` | the file name for saving the index | +| `index` | in | [`cuvsIvfSqIndex_t`](/api-reference/c-api-neighbors-ivf-sq#cuvsivfsqindex) | IVF-SQ index | + +**Returns** + +[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t) + + +### cuvsIvfSqDeserialize + +Load index from file. + +```c +CUVS_EXPORT cuvsError_t cuvsIvfSqDeserialize(cuvsResources_t res, +const char* filename, +cuvsIvfSqIndex_t index); +``` + +Experimental, both the API and the serialization format are subject to change. + +**Parameters** + +| Name | Direction | Type | Description | +| --- | --- | --- | --- | +| `res` | in | [`cuvsResources_t`](/api-reference/c-api-core-c-api#cuvsresources-t) | cuvsResources_t opaque C handle | +| `filename` | in | `const char*` | the name of the file that stores the index | +| `index` | out | [`cuvsIvfSqIndex_t`](/api-reference/c-api-neighbors-ivf-sq#cuvsivfsqindex) | IVF-SQ index loaded from disk | + +**Returns** + +[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t) + +## IVF-SQ index extend + + +### cuvsIvfSqExtend + +Extend the index with the new data. + +```c +CUVS_EXPORT cuvsError_t cuvsIvfSqExtend(cuvsResources_t res, +DLManagedTensor* new_vectors, +DLManagedTensor* new_indices, +cuvsIvfSqIndex_t index); +``` + +**Parameters** + +| Name | Direction | Type | Description | +| --- | --- | --- | --- | +| `res` | in | [`cuvsResources_t`](/api-reference/c-api-core-c-api#cuvsresources-t) | cuvsResources_t opaque C handle | +| `new_vectors` | in | `DLManagedTensor*` | DLManagedTensor* the new vectors to add to the index | +| `new_indices` | in | `DLManagedTensor*` | DLManagedTensor* vector of new indices for the new vectors. If the index is empty, this can be NULL to imply a continuous range `[0...n_rows)`. | +| `index` | inout | [`cuvsIvfSqIndex_t`](/api-reference/c-api-neighbors-ivf-sq#cuvsivfsqindex) | IVF-SQ index to be extended | + +**Returns** + +[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t) diff --git a/fern/pages/c_api/index.md b/fern/pages/c_api/index.md index 3954d4962c..8c72554709 100644 --- a/fern/pages/c_api/index.md +++ b/fern/pages/c_api/index.md @@ -31,6 +31,7 @@ These pages are generated from the documented public headers in the cuVS source - [HNSW](/api-reference/c-api-neighbors-hnsw) - [IVF Flat](/api-reference/c-api-neighbors-ivf-flat) - [IVF PQ](/api-reference/c-api-neighbors-ivf-pq) +- [IVF SQ](/api-reference/c-api-neighbors-ivf-sq) - [NN Descent](/api-reference/c-api-neighbors-nn-descent) - [Refine](/api-reference/c-api-neighbors-refine) - [Tiered Index](/api-reference/c-api-neighbors-tiered-index) diff --git a/fern/pages/cpp_api/cpp-api-neighbors-ivf-sq.md b/fern/pages/cpp_api/cpp-api-neighbors-ivf-sq.md index 1872199a47..3b33951697 100644 --- a/fern/pages/cpp_api/cpp-api-neighbors-ivf-sq.md +++ b/fern/pages/cpp_api/cpp-api-neighbors-ivf-sq.md @@ -25,7 +25,13 @@ IVF-SQ index build parameters. IVF-SQ currently uses 8-bit scalar quantization, storing one `uint8_t` code per vector dimension. ```cpp -struct index_params : cuvs::neighbors::index_params { ... }; +struct index_params : cuvs::neighbors::index_params { + uint32_t n_lists; + uint32_t kmeans_n_iters; + uint32_t max_train_points_per_cluster; + bool conservative_memory_allocation; + bool add_data_on_build; +}; ``` **Fields** @@ -35,8 +41,8 @@ struct index_params : cuvs::neighbors::index_params { ... }; | `n_lists` | `uint32_t` | The number of inverted lists (clusters) | | `kmeans_n_iters` | `uint32_t` | The number of iterations searching for kmeans centers (index building). | | `max_train_points_per_cluster` | `uint32_t` | The number of data vectors (per cluster) to use during iterative kmeans building. | -| `conservative_memory_allocation` | `bool` | By default, the algorithm allocates more space than necessary for individual clusters (`list_data`). This allows to amortize the cost of memory allocation and reduce the number of data copies during repeated calls to `extend` (extending the database). The alternative is the conservative allocation behavior; when enabled, the algorithm always allocates the minimum amount of memory required to store the given number of records. Set this flag to `true` if you prefer to use as little GPU memory for the database as possible. | -| `add_data_on_build` | `bool` | Whether to add the dataset content to the index, i.e.:
- `true` means the index is filled with the dataset vectors and ready to search after calling `build`.
- `false` means `build` only trains the underlying model (e.g. quantizer or clustering), but the index is left empty; you'd need to call `extend` on the index afterwards to populate it. | +| `conservative_memory_allocation` | `bool` | By default, the algorithm allocates more space than necessary for individual clusters (`list_data`). This allows to amortize the cost of memory allocation and reduce the number of data copies during repeated calls to `extend` (extending the database).

The alternative is the conservative allocation behavior; when enabled, the algorithm always allocates the minimum amount of memory required to store the given number of records. Set this flag to `true` if you prefer to use as little GPU memory for the database as possible. | +| `add_data_on_build` | `bool` | Whether to add the dataset content to the index, i.e.:

- `true` means the index is filled with the dataset vectors and ready to search after calling `build`.
- `false` means `build` only trains the underlying model (e.g. quantizer or clustering), but the index is left empty; you'd need to call `extend` on the index afterwards to populate it. | ## IVF-SQ index search parameters @@ -46,7 +52,9 @@ struct index_params : cuvs::neighbors::index_params { ... }; IVF-SQ index search parameters ```cpp -struct search_params : cuvs::neighbors::search_params { ... }; +struct search_params : cuvs::neighbors::search_params { + uint32_t n_probes; +}; ``` **Fields** @@ -64,7 +72,11 @@ IVF-SQ list storage spec ```cpp template -struct list_spec { ... }; +struct list_spec { + SizeT align_max; + SizeT align_min; + uint32_t dim; +}; ``` **Fields** @@ -100,7 +112,7 @@ Note: `CodeT` is the storage type for scalar-quantized residual codes in the inv ```cpp template -struct index : cuvs::neighbors::index { ... }; +struct index; ``` ## IVF-SQ index build