-
Notifications
You must be signed in to change notification settings - Fork 197
Fix multi-threaded CAGRA search OOB #1781
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -217,6 +217,7 @@ struct dataset_descriptor_host { | |
| std::mutex mutex; | ||
| std::atomic<bool> ready; // Not sure if std::holds_alternative is thread-safe | ||
| std::variant<ready_t, init_f> value; | ||
| cudaEvent_t init_event{nullptr}; | ||
|
|
||
| template <typename InitF> | ||
| state(InitF init, size_t size) : ready{false}, value{std::make_tuple(init, size)} | ||
|
|
@@ -229,6 +230,7 @@ struct dataset_descriptor_host { | |
| auto& [ptr, stream] = std::get<ready_t>(value); | ||
| RAFT_CUDA_TRY_NO_THROW(cudaFreeAsync(ptr, stream)); | ||
| } | ||
| if (init_event != nullptr) { RAFT_CUDA_TRY_NO_THROW(cudaEventDestroy(init_event)); } | ||
| } | ||
|
|
||
| void eval(rmm::cuda_stream_view stream) | ||
|
|
@@ -237,8 +239,12 @@ struct dataset_descriptor_host { | |
| if (std::holds_alternative<init_f>(value)) { | ||
| auto& [fun, size] = std::get<init_f>(value); | ||
| dev_descriptor_t* ptr = nullptr; | ||
| RAFT_CUDA_TRY(cudaEventCreateWithFlags(&init_event, cudaEventDisableTiming)); | ||
| RAFT_CUDA_TRY(cudaMallocAsync(&ptr, size, stream)); | ||
| fun(ptr, stream); | ||
| // Record an event after initialization so that other streams can establish | ||
| // a GPU-side dependency without expensive host synchronization. | ||
| RAFT_CUDA_TRY(cudaEventRecord(init_event, stream)); | ||
| value = std::make_tuple(ptr, stream); | ||
| ready.store(true, std::memory_order_release); | ||
|
Comment on lines
+242
to
249
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Handle partial init failures without leaking event/device memory If initialization fails after Line 242 (e.g., Line 243, Line 244, or Line 247), Proposed fix void eval(rmm::cuda_stream_view stream)
{
std::lock_guard<std::mutex> lock(mutex);
if (std::holds_alternative<init_f>(value)) {
auto& [fun, size] = std::get<init_f>(value);
dev_descriptor_t* ptr = nullptr;
- RAFT_CUDA_TRY(cudaEventCreateWithFlags(&init_event, cudaEventDisableTiming));
- RAFT_CUDA_TRY(cudaMallocAsync(&ptr, size, stream));
- fun(ptr, stream);
- // Record an event after initialization so that other streams can establish
- // a GPU-side dependency without expensive host synchronization.
- RAFT_CUDA_TRY(cudaEventRecord(init_event, stream));
- value = std::make_tuple(ptr, stream);
- ready.store(true, std::memory_order_release);
+ cudaEvent_t local_event{nullptr};
+ try {
+ RAFT_CUDA_TRY(cudaEventCreateWithFlags(&local_event, cudaEventDisableTiming));
+ RAFT_CUDA_TRY(cudaMallocAsync(&ptr, size, stream));
+ fun(ptr, stream);
+ // Record an event after initialization so that other streams can establish
+ // a GPU-side dependency without expensive host synchronization.
+ RAFT_CUDA_TRY(cudaEventRecord(local_event, stream));
+ init_event = local_event;
+ value = std::make_tuple(ptr, stream);
+ ready.store(true, std::memory_order_release);
+ } catch (...) {
+ if (ptr != nullptr) { RAFT_CUDA_TRY_NO_THROW(cudaFreeAsync(ptr, stream)); }
+ if (local_event != nullptr) { RAFT_CUDA_TRY_NO_THROW(cudaEventDestroy(local_event)); }
+ throw;
+ }
}
}As per coding guidelines: "Device memory allocations (cudaMalloc, RMM functions) must have corresponding deallocations; use RAII patterns ... to prevent GPU memory leaks on error paths". 🤖 Prompt for AI Agents
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm. This is a good point, IMHO. If the @viclafargue, do we have an RAII-wrapper specifically for cases like this one? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| } | ||
|
|
@@ -247,6 +253,11 @@ struct dataset_descriptor_host { | |
| auto get(rmm::cuda_stream_view stream) -> dev_descriptor_t* | ||
| { | ||
| if (!ready.load(std::memory_order_acquire)) { eval(stream); } | ||
| // Make the caller's stream wait for the init to complete. This is a | ||
| // lightweight GPU-side dependency with no host blocking. On the same | ||
| // stream that performed the init (or after the event has already | ||
| // completed) this is essentially a no-op. | ||
| if (init_event != nullptr) { RAFT_CUDA_TRY(cudaStreamWaitEvent(stream, init_event)); } | ||
| return std::get<0>(std::get<ready_t>(value)); | ||
| } | ||
| }; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Based on the feedback received on my PR (https://github.com/rapidsai/cuvs/pull/1771/changes#diff-52f864438c5274d5d365954ba71d9988bb00f8c415e2b3c081aa532f3a1a8f07R34-R36), one wonders if debugging might be easier if one used
RAFT_EXPECTSwith a message here instead?What is the generally accepted convention?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good question. It looks like the convention is to use
RAFT_CUDA_TRYfor error handling of CUDA API calls whileRAFT_EXPECTSwould most often be used for assertion testing.RAFT_CUDA_TRYshould point out the file and line where the error happened. But, I guess thatRAFT_EXPECTSmay indeed come useful when one wants to display a specific error message.