From e471907305f1d70b8a27ed51b9ae61036db32887 Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Fri, 29 May 2026 17:18:59 +0200 Subject: [PATCH 1/7] small PR-ready for feedback --- cpp/src/barrier/barrier.cu | 68 +-- cpp/src/barrier/cusparse_info.hpp | 36 +- cpp/src/barrier/cusparse_view.cu | 105 ++-- cpp/src/pdlp/cusparse_view.cu | 883 ++++++++++++------------------ cpp/src/pdlp/cusparse_view.hpp | 305 +++++------ 5 files changed, 571 insertions(+), 826 deletions(-) diff --git a/cpp/src/barrier/barrier.cu b/cpp/src/barrier/barrier.cu index e7604fb60e..a5995503c7 100644 --- a/cpp/src/barrier/barrier.cu +++ b/cpp/src/barrier/barrier.cu @@ -1353,13 +1353,13 @@ class iteration_data_t { // v = alpha * A * Dinv * A^T * y + beta * v void gpu_adat_multiply(f_t alpha, const rmm::device_uvector& y, - detail::cusparse_dn_vec_descr_wrapper_t const& cusparse_y, + detail::cusparse_dn_vec_descr_view cusparse_y, f_t beta, rmm::device_uvector& v, - detail::cusparse_dn_vec_descr_wrapper_t const& cusparse_v, + detail::cusparse_dn_vec_descr_view cusparse_v, rmm::device_uvector& u, - detail::cusparse_dn_vec_descr_wrapper_t const& cusparse_u, + detail::cusparse_dn_vec_descr_view cusparse_u, cusparse_view_t& cusparse_view, const rmm::device_uvector& d_inv_diag) const { @@ -1593,20 +1593,20 @@ class iteration_data_t { pinned_dense_vector_t dz; cusparse_info_t cusparse_info; cusparse_view_t cusparse_view_; - detail::cusparse_dn_vec_descr_wrapper_t cusparse_tmp4_; - detail::cusparse_dn_vec_descr_wrapper_t cusparse_h_; - detail::cusparse_dn_vec_descr_wrapper_t cusparse_dx_residual_; - detail::cusparse_dn_vec_descr_wrapper_t cusparse_dy_; - detail::cusparse_dn_vec_descr_wrapper_t cusparse_dx_residual_5_; - detail::cusparse_dn_vec_descr_wrapper_t cusparse_dx_residual_6_; - detail::cusparse_dn_vec_descr_wrapper_t cusparse_dx_; - detail::cusparse_dn_vec_descr_wrapper_t cusparse_dx_residual_3_; - detail::cusparse_dn_vec_descr_wrapper_t cusparse_dx_residual_4_; - detail::cusparse_dn_vec_descr_wrapper_t cusparse_r1_; - detail::cusparse_dn_vec_descr_wrapper_t cusparse_dual_residual_; - detail::cusparse_dn_vec_descr_wrapper_t cusparse_y_residual_; + detail::cusparse_dn_vec_uptr cusparse_tmp4_; + detail::cusparse_dn_vec_uptr cusparse_h_; + detail::cusparse_dn_vec_uptr cusparse_dx_residual_; + detail::cusparse_dn_vec_uptr cusparse_dy_; + detail::cusparse_dn_vec_uptr cusparse_dx_residual_5_; + detail::cusparse_dn_vec_uptr cusparse_dx_residual_6_; + detail::cusparse_dn_vec_uptr cusparse_dx_; + detail::cusparse_dn_vec_uptr cusparse_dx_residual_3_; + detail::cusparse_dn_vec_uptr cusparse_dx_residual_4_; + detail::cusparse_dn_vec_uptr cusparse_r1_; + detail::cusparse_dn_vec_uptr cusparse_dual_residual_; + detail::cusparse_dn_vec_uptr cusparse_y_residual_; // GPU ADAT multiply - detail::cusparse_dn_vec_descr_wrapper_t cusparse_u_; + detail::cusparse_dn_vec_uptr cusparse_u_; // Device vectors @@ -2059,7 +2059,7 @@ void barrier_solver_t::gpu_compute_residuals(const rmm::device_uvector auto cusparse_d_x = data.cusparse_view_.create_vector(d_x); auto descr_primal_residual = data.cusparse_view_.create_vector(data.d_primal_residual_); - data.cusparse_view_.spmv(-1.0, cusparse_d_x, 1.0, descr_primal_residual); + data.cusparse_view_.spmv(-1.0, cusparse_d_x.get(), 1.0, descr_primal_residual.get()); // Compute bound_residual = E'*u - w - E'*x if (data.n_upper_bounds > 0) { @@ -2079,7 +2079,7 @@ void barrier_solver_t::gpu_compute_residuals(const rmm::device_uvector if (data.Q.n > 0) { raft::copy(data.d_c_.data(), data.c.data(), data.c.size(), stream_view_); auto cusparse_d_c = data.cusparse_view_.create_vector(data.d_c_); - data.cusparse_Q_view_.spmv(1.0, cusparse_d_x, 1.0, cusparse_d_c); + data.cusparse_Q_view_.spmv(1.0, cusparse_d_x.get(), 1.0, cusparse_d_c.get()); } else { raft::copy(data.d_c_.data(), data.c.data(), data.c.size(), stream_view_); } @@ -2092,7 +2092,7 @@ void barrier_solver_t::gpu_compute_residuals(const rmm::device_uvector // Compute dual_residual = c - A'*y - z + E*v auto cusparse_d_y = data.cusparse_view_.create_vector(d_y); auto descr_dual_residual = data.cusparse_view_.create_vector(data.d_dual_residual_); - data.cusparse_view_.transpose_spmv(-1.0, cusparse_d_y, 1.0, descr_dual_residual); + data.cusparse_view_.transpose_spmv(-1.0, cusparse_d_y.get(), 1.0, descr_dual_residual.get()); if (data.n_upper_bounds > 0) { cub::DeviceTransform::Transform( @@ -2493,7 +2493,7 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t(d_dx_residual_6, stream_view_); @@ -2700,8 +2700,8 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t::compute_primal_dual_objective(iteration_data_t< if (data.Q.n > 0) { auto cusparse_d_x = data.cusparse_view_.create_vector(data.d_x_); auto cusparse_Qx = data.cusparse_view_.create_vector(data.d_Qx_); - data.cusparse_Q_view_.spmv(1.0, cusparse_d_x, 0.0, cusparse_Qx); + data.cusparse_Q_view_.spmv(1.0, cusparse_d_x.get(), 0.0, cusparse_Qx.get()); rmm::device_scalar d_xQx(stream_view_); RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(lp.handle_ptr->get_cublas_handle(), data.d_Qx_.size(), diff --git a/cpp/src/barrier/cusparse_info.hpp b/cpp/src/barrier/cusparse_info.hpp index 105c470cd7..893c6d3850 100644 --- a/cpp/src/barrier/cusparse_info.hpp +++ b/cpp/src/barrier/cusparse_info.hpp @@ -7,6 +7,7 @@ #pragma once +#include #include #include @@ -17,8 +18,21 @@ #include +#include +#include + namespace cuopt::linear_programming::dual_simplex { +struct cusparse_spgemm_deleter { + void operator()(cusparseSpGEMMDescr_t descr) const noexcept + { + if (descr) { CUOPT_CUSPARSE_TRY_NO_THROW(cusparseSpGEMM_destroyDescr(descr)); } + } +}; + +using cusparse_spgemm_uptr = + std::unique_ptr, cusparse_spgemm_deleter>; + template struct cusparse_info_t { cusparse_info_t(raft::handle_t const* handle) @@ -35,24 +49,10 @@ struct cusparse_info_t { beta.set_value_async(v, handle->get_stream()); } - ~cusparse_info_t() - { - if (spgemm_descr != nullptr) { - CUOPT_CUSPARSE_TRY_NO_THROW(cusparseSpGEMM_destroyDescr(spgemm_descr)); - } - if (matA_descr != nullptr) { CUOPT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA_descr)); } - if (matDAT_descr != nullptr) { - CUOPT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matDAT_descr)); - } - if (matADAT_descr != nullptr) { - CUOPT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matADAT_descr)); - } - } - - cusparseSpMatDescr_t matA_descr{nullptr}; - cusparseSpMatDescr_t matDAT_descr{nullptr}; - cusparseSpMatDescr_t matADAT_descr{nullptr}; - cusparseSpGEMMDescr_t spgemm_descr{nullptr}; + detail::cusparse_sp_mat_uptr matA_descr; + detail::cusparse_sp_mat_uptr matDAT_descr; + detail::cusparse_sp_mat_uptr matADAT_descr; + cusparse_spgemm_uptr spgemm_descr; rmm::device_scalar alpha; rmm::device_scalar beta; rmm::device_uvector buffer_size; diff --git a/cpp/src/barrier/cusparse_view.cu b/cpp/src/barrier/cusparse_view.cu index b7673eacd5..5bf91b5ba2 100644 --- a/cpp/src/barrier/cusparse_view.cu +++ b/cpp/src/barrier/cusparse_view.cu @@ -163,47 +163,26 @@ cusparse_view_t::cusparse_view_t(raft::handle_t const* handle_ptr, A_T_indices_ = device_copy(A.i, handle_ptr->get_stream()); A_T_data_ = device_copy(A.x, handle_ptr->get_stream()); - cusparseCreateCsr(&A_, - rows, - cols, - nnz, - A_offsets_.data(), - A_indices_.data(), - A_data_.data(), - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_64F); - - cusparseCreateCsr(&A_T_, - cols, - rows, - nnz, - A_T_offsets_.data(), - A_T_indices_.data(), - A_T_data_.data(), - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_64F); - - // Tmp just to init the buffer size and preprocess - cusparseDnVecDescr_t x; - cusparseDnVecDescr_t y; + A_ = detail::make_csr( + rows, cols, nnz, A_offsets_.data(), A_indices_.data(), A_data_.data()); + A_T_ = detail::make_csr( + cols, rows, nnz, A_T_offsets_.data(), A_T_indices_.data(), A_T_data_.data()); + + // Temp dense vectors solely to query buffer sizes and preprocess. RAII via uptr. rmm::device_uvector d_x(cols, handle_ptr_->get_stream()); rmm::device_uvector d_y(rows, handle_ptr_->get_stream()); - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(&x, d_x.size(), d_x.data())); - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(&y, d_y.size(), d_y.data())); + detail::cusparse_dn_vec_uptr x = detail::make_dnvec(d_x.size(), d_x.data()); + detail::cusparse_dn_vec_uptr y = detail::make_dnvec(d_y.size(), d_y.data()); size_t buffer_size_spmv = 0; RAFT_CUSPARSE_TRY( raft::sparse::detail::cusparsespmv_buffersize(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, d_one_.data(), - A_, - x, + A_.get(), + x.get(), d_one_.data(), - y, + y.get(), get_spmv_alg(A_offsets_.size() - 1), &buffer_size_spmv, handle_ptr_->get_stream())); @@ -212,10 +191,10 @@ cusparse_view_t::cusparse_view_t(raft::handle_t const* handle_ptr, my_cusparsespmv_preprocess(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, d_one_.data(), - A_, - x, + A_.get(), + x.get(), d_one_.data(), - y, + y.get(), get_spmv_alg(A_offsets_.size() - 1), spmv_buffer_.data(), handle_ptr->get_stream()); @@ -224,10 +203,10 @@ cusparse_view_t::cusparse_view_t(raft::handle_t const* handle_ptr, raft::sparse::detail::cusparsespmv_buffersize(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, d_one_.data(), - A_T_, - y, + A_T_.get(), + y.get(), d_one_.data(), - x, + x.get(), get_spmv_alg(A_T_offsets_.size() - 1), &buffer_size_spmv, handle_ptr_->get_stream())); @@ -236,31 +215,20 @@ cusparse_view_t::cusparse_view_t(raft::handle_t const* handle_ptr, my_cusparsespmv_preprocess(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, d_one_.data(), - A_T_, - y, + A_T_.get(), + y.get(), d_one_.data(), - x, + x.get(), get_spmv_alg(A_T_offsets_.size() - 1), spmv_buffer_transpose_.data(), handle_ptr->get_stream()); - RAFT_CUSPARSE_TRY(cusparseDestroyDnVec(x)); - RAFT_CUSPARSE_TRY(cusparseDestroyDnVec(y)); } template -cusparse_view_t::~cusparse_view_t() -{ - CUOPT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(A_)); - CUOPT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(A_T_)); -} - -template -detail::cusparse_dn_vec_descr_wrapper_t cusparse_view_t::create_vector( +detail::cusparse_dn_vec_uptr cusparse_view_t::create_vector( rmm::device_uvector const& vec) { - detail::cusparse_dn_vec_descr_wrapper_t descr; - descr.create(vec.size(), const_cast(vec.data())); - return descr; + return detail::make_dnvec(vec.size(), const_cast(vec.data())); } template @@ -282,16 +250,16 @@ void cusparse_view_t::spmv(f_t alpha, f_t beta, rmm::device_uvector& y) { - detail::cusparse_dn_vec_descr_wrapper_t x_cusparse = create_vector(x); - detail::cusparse_dn_vec_descr_wrapper_t y_cusparse = create_vector(y); - spmv(alpha, x_cusparse, beta, y_cusparse); + detail::cusparse_dn_vec_uptr x_cusparse = create_vector(x); + detail::cusparse_dn_vec_uptr y_cusparse = create_vector(y); + spmv(alpha, x_cusparse.get(), beta, y_cusparse.get()); } template void cusparse_view_t::spmv(f_t alpha, - detail::cusparse_dn_vec_descr_wrapper_t const& x, + detail::cusparse_dn_vec_descr_view x, f_t beta, - detail::cusparse_dn_vec_descr_wrapper_t const& y) + detail::cusparse_dn_vec_descr_view y) { // Would be simpler if we could pass host data direclty but other cusparse calls with the same // handler depend on device data @@ -306,7 +274,7 @@ void cusparse_view_t::spmv(f_t alpha, raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, (alpha == 1) ? d_one_.data() : d_minus_one_.data(), - A_, + A_.get(), x, d_beta->data(), y, @@ -334,17 +302,16 @@ void cusparse_view_t::transpose_spmv(f_t alpha, f_t beta, rmm::device_uvector& y) { - detail::cusparse_dn_vec_descr_wrapper_t x_cusparse = create_vector(x); - detail::cusparse_dn_vec_descr_wrapper_t y_cusparse = create_vector(y); - transpose_spmv(alpha, x_cusparse, beta, y_cusparse); + detail::cusparse_dn_vec_uptr x_cusparse = create_vector(x); + detail::cusparse_dn_vec_uptr y_cusparse = create_vector(y); + transpose_spmv(alpha, x_cusparse.get(), beta, y_cusparse.get()); } template -void cusparse_view_t::transpose_spmv( - f_t alpha, - detail::cusparse_dn_vec_descr_wrapper_t const& x, - f_t beta, - detail::cusparse_dn_vec_descr_wrapper_t const& y) +void cusparse_view_t::transpose_spmv(f_t alpha, + detail::cusparse_dn_vec_descr_view x, + f_t beta, + detail::cusparse_dn_vec_descr_view y) { // Would be simpler if we could pass host data direct;y but other cusparse calls with the same // handler depend on device data @@ -359,7 +326,7 @@ void cusparse_view_t::transpose_spmv( raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, (alpha == 1) ? d_one_.data() : d_minus_one_.data(), - A_T_, + A_T_.get(), x, d_beta->data(), y, diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index 396fd27499..afc65e0f7c 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -29,129 +29,9 @@ struct double_to_float_functor { namespace cuopt::linear_programming::detail { -// cusparse_sp_mat_descr_wrapper_t implementation -template -cusparse_sp_mat_descr_wrapper_t::cusparse_sp_mat_descr_wrapper_t() - : need_destruction_(false) -{ -} - -template -cusparse_sp_mat_descr_wrapper_t::~cusparse_sp_mat_descr_wrapper_t() -{ - if (need_destruction_) { RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(descr_)); } -} - -template -cusparse_sp_mat_descr_wrapper_t::cusparse_sp_mat_descr_wrapper_t( - const cusparse_sp_mat_descr_wrapper_t& other) - : descr_(other.descr_), need_destruction_(false) -{ -} - -template -void cusparse_sp_mat_descr_wrapper_t::create( - int64_t m, int64_t n, int64_t nnz, i_t* offsets, i_t* indices, f_t* values) -{ - RAFT_CUSPARSE_TRY( - raft::sparse::detail::cusparsecreatecsr(&descr_, m, n, nnz, offsets, indices, values)); - need_destruction_ = true; -} - -template -cusparse_sp_mat_descr_wrapper_t::operator cusparseSpMatDescr_t() const -{ - return descr_; -} - -// cusparse_dn_vec_descr_wrapper_t implementation -template -cusparse_dn_vec_descr_wrapper_t::cusparse_dn_vec_descr_wrapper_t() : need_destruction_(false) -{ -} - -template -cusparse_dn_vec_descr_wrapper_t::~cusparse_dn_vec_descr_wrapper_t() -{ - if (need_destruction_) { RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(descr_)); } -} - -template -cusparse_dn_vec_descr_wrapper_t::cusparse_dn_vec_descr_wrapper_t( - const cusparse_dn_vec_descr_wrapper_t& other) - : descr_(other.descr_), need_destruction_(false) -{ -} - -template -cusparse_dn_vec_descr_wrapper_t& cusparse_dn_vec_descr_wrapper_t::operator=( - cusparse_dn_vec_descr_wrapper_t&& other) -{ - if (need_destruction_) { RAFT_CUSPARSE_TRY(cusparseDestroyDnVec(descr_)); } - descr_ = other.descr_; - need_destruction_ = other.need_destruction_; - other.need_destruction_ = false; - return *this; -} - -template -void cusparse_dn_vec_descr_wrapper_t::create(int64_t size, f_t* values) -{ - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(&descr_, size, values)); - need_destruction_ = true; -} - -template -cusparse_dn_vec_descr_wrapper_t::operator cusparseDnVecDescr_t() const -{ - return descr_; -} - -// cusparse_dn_mat_descr_wrapper_t implementation -template -cusparse_dn_mat_descr_wrapper_t::cusparse_dn_mat_descr_wrapper_t() : need_destruction_(false) -{ -} - -template -cusparse_dn_mat_descr_wrapper_t::~cusparse_dn_mat_descr_wrapper_t() -{ - if (need_destruction_) { RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(descr_)); } -} - -template -cusparse_dn_mat_descr_wrapper_t::cusparse_dn_mat_descr_wrapper_t( - const cusparse_dn_mat_descr_wrapper_t& other) - : descr_(other.descr_), need_destruction_(false) -{ -} - -template -cusparse_dn_mat_descr_wrapper_t& cusparse_dn_mat_descr_wrapper_t::operator=( - cusparse_dn_mat_descr_wrapper_t&& other) -{ - if (need_destruction_) { RAFT_CUSPARSE_TRY(cusparseDestroyDnMat(descr_)); } - descr_ = other.descr_; - need_destruction_ = other.need_destruction_; - other.need_destruction_ = false; - return *this; -} - -template -void cusparse_dn_mat_descr_wrapper_t::create( - int64_t row, int64_t col, int64_t ld, f_t* values, cusparseOrder_t order) -{ - if (need_destruction_) { RAFT_CUSPARSE_TRY(cusparseDestroyDnMat(descr_)); } - RAFT_CUSPARSE_TRY( - raft::sparse::detail::cusparsecreatednmat(&descr_, row, col, ld, values, order)); - need_destruction_ = true; -} - -template -cusparse_dn_mat_descr_wrapper_t::operator cusparseDnMatDescr_t() const -{ - return descr_; -} +// All inline factories and aliases for SpMat/DnVec/DnMat live in the header. +// Deleter operator() bodies for SpMVOpDescr/SpMVOpPlan are defined further down +// because they need dlsym-resolved cuSPARSE symbols. #if CUDA_VER_12_4_UP struct dynamic_load_runtime { @@ -249,10 +129,10 @@ void my_cusparsespmm_preprocess(cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, const T* alpha, - const cusparseSpMatDescr_t matA, - const cusparseDnMatDescr_t matB, + cusparse_sp_mat_descr_view matA, + cusparse_dn_mat_descr_view matB, const T* beta, - const cusparseDnMatDescr_t matC, + cusparse_dn_mat_descr_view matC, cusparseSpMMAlg_t alg, void* externalBuffer, cudaStream_t stream) @@ -302,134 +182,59 @@ using cusparseSpMVOp_sig = cusparse_sig; -cusparseStatus_t cusparse_spmvop_descr_wrapper_t::dlsym_create(cusparseHandle_t handle, - cusparseSpMVOpDescr_t* descr, - cusparseOperation_t opA, - cusparseSpMatDescr_t matA, - cusparseDnVecDescr_t vecX, - cusparseDnVecDescr_t vecY, - cusparseDnVecDescr_t vecZ, - cudaDataType computeType, - void* buffer) -{ - static const auto fn = - dynamic_load_runtime::function("cusparseSpMVOp_createDescr"); - return (*fn)(handle, descr, opA, matA, vecX, vecY, vecZ, computeType, buffer); -} +// dlsym-resolved create/destroy. The *_destroy_sig variants are reused by the +// deleters; the *_create_sig variants are reused by make_spmvop_{descr,plan}. -cusparseStatus_t cusparse_spmvop_descr_wrapper_t::dlsym_destroy(cusparseSpMVOpDescr_t descr) +void cusparse_spmvop_descr_deleter::operator()(cusparseSpMVOpDescr_t descr) const noexcept { + if (!descr) { return; } static const auto fn = dynamic_load_runtime::function("cusparseSpMVOp_destroyDescr"); - return (*fn)(descr); + if (fn.has_value()) { RAFT_CUSPARSE_TRY_NO_THROW((*fn)(descr)); } } -cusparseStatus_t cusparse_spmvop_plan_wrapper_t::dlsym_create(cusparseHandle_t handle, - cusparseSpMVOpDescr_t descr, - cusparseSpMVOpPlan_t* plan, - char* ltoIRBuf, - size_t ltoIRSize) -{ - static const auto fn = - dynamic_load_runtime::function("cusparseSpMVOp_createPlan"); - return (*fn)(handle, descr, plan, ltoIRBuf, ltoIRSize); -} - -cusparseStatus_t cusparse_spmvop_plan_wrapper_t::dlsym_destroy(cusparseSpMVOpPlan_t plan) +void cusparse_spmvop_plan_deleter::operator()(cusparseSpMVOpPlan_t plan) const noexcept { + if (!plan) { return; } static const auto fn = dynamic_load_runtime::function("cusparseSpMVOp_destroyPlan"); - return (*fn)(plan); -} - -cusparse_spmvop_descr_wrapper_t::cusparse_spmvop_descr_wrapper_t() - : descr_(nullptr), need_destruction_(false) -{ -} - -cusparse_spmvop_descr_wrapper_t::~cusparse_spmvop_descr_wrapper_t() -{ - if (!need_destruction_) { return; } - RAFT_CUSPARSE_TRY_NO_THROW(dlsym_destroy(descr_)); -} - -cusparse_spmvop_descr_wrapper_t::cusparse_spmvop_descr_wrapper_t( - const cusparse_spmvop_descr_wrapper_t& other) - : descr_(other.descr_), need_destruction_(false) -{ -} - -cusparse_spmvop_descr_wrapper_t& cusparse_spmvop_descr_wrapper_t::operator=( - cusparse_spmvop_descr_wrapper_t&& other) -{ - if (need_destruction_) { RAFT_CUSPARSE_TRY(dlsym_destroy(descr_)); } - descr_ = other.descr_; - need_destruction_ = other.need_destruction_; - other.need_destruction_ = false; - return *this; + if (fn.has_value()) { RAFT_CUSPARSE_TRY_NO_THROW((*fn)(plan)); } } -void cusparse_spmvop_descr_wrapper_t::create(cusparseHandle_t handle, +cusparse_spmvop_descr_uptr make_spmvop_descr(cusparseHandle_t handle, cusparseOperation_t opA, - cusparseSpMatDescr_t matA, - cusparseDnVecDescr_t vecX, - cusparseDnVecDescr_t vecY, - cusparseDnVecDescr_t vecZ, + cusparse_sp_mat_descr_view matA, + cusparse_dn_vec_descr_view vecX, + cusparse_dn_vec_descr_view vecY, + cusparse_dn_vec_descr_view vecZ, cudaDataType computeType, rmm::device_uvector& buffer) { - if (need_destruction_) { RAFT_CUSPARSE_TRY(dlsym_destroy(descr_)); } + static const auto fn = + dynamic_load_runtime::function("cusparseSpMVOp_createDescr"); + cusparseSpMVOpDescr_t descr{nullptr}; RAFT_CUSPARSE_TRY( - dlsym_create(handle, &descr_, opA, matA, vecX, vecY, vecZ, computeType, buffer.data())); - need_destruction_ = true; -} - -cusparse_spmvop_descr_wrapper_t::operator cusparseSpMVOpDescr_t() const { return descr_; } - -cusparse_spmvop_plan_wrapper_t::cusparse_spmvop_plan_wrapper_t() - : plan_(nullptr), need_destruction_(false) -{ -} - -cusparse_spmvop_plan_wrapper_t::~cusparse_spmvop_plan_wrapper_t() -{ - if (!need_destruction_) { return; } - RAFT_CUSPARSE_TRY_NO_THROW(dlsym_destroy(plan_)); -} - -cusparse_spmvop_plan_wrapper_t::cusparse_spmvop_plan_wrapper_t( - const cusparse_spmvop_plan_wrapper_t& other) - : plan_(other.plan_), need_destruction_(false) -{ -} - -cusparse_spmvop_plan_wrapper_t& cusparse_spmvop_plan_wrapper_t::operator=( - cusparse_spmvop_plan_wrapper_t&& other) -{ - if (need_destruction_) { RAFT_CUSPARSE_TRY(dlsym_destroy(plan_)); } - plan_ = other.plan_; - need_destruction_ = other.need_destruction_; - other.need_destruction_ = false; - return *this; + (*fn)(handle, &descr, opA, matA, vecX, vecY, vecZ, computeType, buffer.data())); + return cusparse_spmvop_descr_uptr{descr}; } -void cusparse_spmvop_plan_wrapper_t::create(cusparseHandle_t handle, cusparseSpMVOpDescr_t descr) +cusparse_spmvop_plan_uptr make_spmvop_plan(cusparseHandle_t handle, cusparseSpMVOpDescr_t descr) { - if (need_destruction_) { RAFT_CUSPARSE_TRY(dlsym_destroy(plan_)); } + static const auto fn = + dynamic_load_runtime::function("cusparseSpMVOp_createPlan"); + cusparseSpMVOpPlan_t plan{nullptr}; // cuOpt does not supply user-provided LTO IR; pass nullptr/0 so cuSPARSE JITs internally. - RAFT_CUSPARSE_TRY(dlsym_create(handle, descr, &plan_, /*ltoIRBuf=*/nullptr, /*ltoIRSize=*/0)); - need_destruction_ = true; + RAFT_CUSPARSE_TRY((*fn)(handle, descr, &plan, /*ltoIRBuf=*/nullptr, /*ltoIRSize=*/0)); + return cusparse_spmvop_plan_uptr{plan}; } -cusparse_spmvop_plan_wrapper_t::operator cusparseSpMVOpPlan_t() const { return plan_; } - void cusparse_spmvop_run(cusparseHandle_t handle, cusparseSpMVOpPlan_t plan, const void* alpha, const void* beta, - cusparseDnVecDescr_t vecX, - cusparseDnVecDescr_t vecY, - cusparseDnVecDescr_t vecZ, + cusparse_dn_vec_descr_view vecX, + cusparse_dn_vec_descr_view vecY, + cusparse_dn_vec_descr_view vecZ, cudaStream_t stream) { static const auto func = dynamic_load_runtime::function("cusparseSpMVOp"); @@ -455,18 +260,6 @@ cusparse_view_t::cusparse_view_t( bool enable_mixed_precision_spmv) : batch_mode_(climber_strategies.size() > 1), handle_ptr_(handle_ptr), - A{}, - A_T{}, - c{}, - primal_solution{}, - dual_solution{}, - primal_gradient{}, - dual_gradient{}, - current_AtY{}, - next_AtY{}, - potential_next_dual_solution{}, - tmp_primal{}, - tmp_dual{}, A_T_{op_problem_scaled.reverse_coefficients}, A_T_offsets_{op_problem_scaled.reverse_offsets}, A_T_indices_{op_problem_scaled.reverse_constraints}, @@ -496,106 +289,108 @@ cusparse_view_t::cusparse_view_t( #endif // setup cusparse view - A.create(op_problem_scaled.n_constraints, - op_problem_scaled.n_variables, - op_problem_scaled.nnz, - const_cast(op_problem_scaled.offsets.data()), - const_cast(op_problem_scaled.variables.data()), - const_cast(op_problem_scaled.coefficients.data())); - - A_T.create(op_problem_scaled.n_variables, - op_problem_scaled.n_constraints, - op_problem_scaled.nnz, - const_cast(A_T_offsets_.data()), - const_cast(A_T_indices_.data()), - const_cast(A_T_.data())); - - c.create(op_problem_scaled.n_variables, - const_cast(op_problem_scaled.objective_coefficients.data())); - - primal_solution.create(op_problem_scaled.n_variables, - current_saddle_point_state.get_primal_solution().data()); - dual_solution.create(op_problem_scaled.n_constraints, - current_saddle_point_state.get_dual_solution().data()); - - // TODO batch mdoe: convert those to RAII views + A = make_csr(op_problem_scaled.n_constraints, + op_problem_scaled.n_variables, + op_problem_scaled.nnz, + const_cast(op_problem_scaled.offsets.data()), + const_cast(op_problem_scaled.variables.data()), + const_cast(op_problem_scaled.coefficients.data())); + + A_T = make_csr(op_problem_scaled.n_variables, + op_problem_scaled.n_constraints, + op_problem_scaled.nnz, + const_cast(A_T_offsets_.data()), + const_cast(A_T_indices_.data()), + const_cast(A_T_.data())); + + c = make_dnvec(op_problem_scaled.n_variables, + const_cast(op_problem_scaled.objective_coefficients.data())); + + primal_solution = make_dnvec(op_problem_scaled.n_variables, + current_saddle_point_state.get_primal_solution().data()); + dual_solution = make_dnvec(op_problem_scaled.n_constraints, + current_saddle_point_state.get_dual_solution().data()); + if (batch_mode_) { [[maybe_unused]] const bool is_cupdlpx = is_cupdlpx_restart(hyper_params); cuopt_assert(is_cupdlpx, "Batch mode only supported with cuPDLPx restart"); - batch_dual_solutions.create(op_problem_scaled.n_constraints, - climber_strategies.size(), - climber_strategies.size(), - current_saddle_point_state.get_dual_solution().data(), - CUSPARSE_ORDER_ROW); - batch_current_AtYs.create(op_problem_scaled.n_variables, - climber_strategies.size(), - climber_strategies.size(), - current_saddle_point_state.get_current_AtY().data(), - CUSPARSE_ORDER_ROW); - batch_potential_next_dual_solution.create(op_problem_scaled.n_constraints, - climber_strategies.size(), - op_problem_scaled.n_constraints, - _potential_next_dual_solution.data(), - CUSPARSE_ORDER_COL); - batch_next_AtYs.create(op_problem_scaled.n_variables, - climber_strategies.size(), - op_problem_scaled.n_variables, - current_saddle_point_state.get_next_AtY().data(), - CUSPARSE_ORDER_COL); + batch_dual_solutions = make_dnmat(op_problem_scaled.n_constraints, + climber_strategies.size(), + climber_strategies.size(), + current_saddle_point_state.get_dual_solution().data(), + CUSPARSE_ORDER_ROW); + batch_current_AtYs = make_dnmat(op_problem_scaled.n_variables, + climber_strategies.size(), + climber_strategies.size(), + current_saddle_point_state.get_current_AtY().data(), + CUSPARSE_ORDER_ROW); + batch_potential_next_dual_solution = + make_dnmat(op_problem_scaled.n_constraints, + climber_strategies.size(), + op_problem_scaled.n_constraints, + _potential_next_dual_solution.data(), + CUSPARSE_ORDER_COL); + batch_next_AtYs = make_dnmat(op_problem_scaled.n_variables, + climber_strategies.size(), + op_problem_scaled.n_variables, + current_saddle_point_state.get_next_AtY().data(), + CUSPARSE_ORDER_COL); cuopt_assert(_reflected_primal_solution.size() > 0, "Reflected primal solution empty"); - batch_reflected_primal_solutions.create(op_problem_scaled.n_variables, - climber_strategies.size(), - climber_strategies.size(), - _reflected_primal_solution.data(), - CUSPARSE_ORDER_ROW); - batch_dual_gradients.create(op_problem_scaled.n_constraints, - climber_strategies.size(), - climber_strategies.size(), - current_saddle_point_state.get_dual_gradient().data(), - CUSPARSE_ORDER_ROW); + batch_reflected_primal_solutions = make_dnmat(op_problem_scaled.n_variables, + climber_strategies.size(), + climber_strategies.size(), + _reflected_primal_solution.data(), + CUSPARSE_ORDER_ROW); + batch_dual_gradients = make_dnmat(op_problem_scaled.n_constraints, + climber_strategies.size(), + climber_strategies.size(), + current_saddle_point_state.get_dual_gradient().data(), + CUSPARSE_ORDER_ROW); } // Necessary even in non batch mode (because of infeasiblity detection) - batch_delta_primal_solutions.create(op_problem_scaled.n_variables, - climber_strategies.size(), - op_problem_scaled.n_variables, - current_saddle_point_state.get_delta_primal().data(), - CUSPARSE_ORDER_COL); - batch_delta_dual_solutions.create(op_problem_scaled.n_constraints, + batch_delta_primal_solutions = + make_dnmat(op_problem_scaled.n_variables, + climber_strategies.size(), + op_problem_scaled.n_variables, + current_saddle_point_state.get_delta_primal().data(), + CUSPARSE_ORDER_COL); + batch_delta_dual_solutions = make_dnmat(op_problem_scaled.n_constraints, + climber_strategies.size(), + op_problem_scaled.n_constraints, + current_saddle_point_state.get_delta_dual().data(), + CUSPARSE_ORDER_COL); + batch_tmp_duals = make_dnmat(op_problem_scaled.n_constraints, climber_strategies.size(), op_problem_scaled.n_constraints, - current_saddle_point_state.get_delta_dual().data(), + _tmp_dual.data(), CUSPARSE_ORDER_COL); - batch_tmp_duals.create(op_problem_scaled.n_constraints, - climber_strategies.size(), - op_problem_scaled.n_constraints, - _tmp_dual.data(), - CUSPARSE_ORDER_COL); - batch_tmp_primals.create(op_problem_scaled.n_variables, - climber_strategies.size(), - op_problem_scaled.n_variables, - _tmp_primal.data(), - CUSPARSE_ORDER_COL); - - primal_gradient.create( + batch_tmp_primals = make_dnmat(op_problem_scaled.n_variables, + climber_strategies.size(), + op_problem_scaled.n_variables, + _tmp_primal.data(), + CUSPARSE_ORDER_COL); + + primal_gradient = make_dnvec( current_saddle_point_state.get_primal_gradient().size(), // It is 0 in cupdlpx current_saddle_point_state.get_primal_gradient().data()); - dual_gradient.create(op_problem_scaled.n_constraints, - current_saddle_point_state.get_dual_gradient().data()); + dual_gradient = make_dnvec(op_problem_scaled.n_constraints, + current_saddle_point_state.get_dual_gradient().data()); - current_AtY.create(op_problem_scaled.n_variables, - current_saddle_point_state.get_current_AtY().data()); - next_AtY.create(op_problem_scaled.n_variables, current_saddle_point_state.get_next_AtY().data()); + current_AtY = make_dnvec(op_problem_scaled.n_variables, + current_saddle_point_state.get_current_AtY().data()); + next_AtY = make_dnvec(op_problem_scaled.n_variables, + current_saddle_point_state.get_next_AtY().data()); - potential_next_dual_solution.create(op_problem_scaled.n_constraints, - _potential_next_dual_solution.data()); + potential_next_dual_solution = + make_dnvec(op_problem_scaled.n_constraints, _potential_next_dual_solution.data()); - tmp_primal.create(op_problem_scaled.n_variables, _tmp_primal.data()); - tmp_dual.create(op_problem_scaled.n_constraints, _tmp_dual.data()); + tmp_primal = make_dnvec(op_problem_scaled.n_variables, _tmp_primal.data()); + tmp_dual = make_dnvec(op_problem_scaled.n_constraints, _tmp_dual.data()); if (hyper_params.use_reflected_primal_dual) { cuopt_assert(_reflected_primal_solution.size() > 0, "Reflected primal solution empty"); - reflected_primal_solution.create(op_problem_scaled.n_variables, - _reflected_primal_solution.data()); + reflected_primal_solution = + make_dnvec(op_problem_scaled.n_variables, _reflected_primal_solution.data()); } const rmm::device_scalar alpha{1, handle_ptr->get_stream()}; @@ -605,10 +400,10 @@ cusparse_view_t::cusparse_view_t( raft::sparse::detail::cusparsespmv_buffersize(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A, - c, + A.get(), + c.get(), beta.data(), - dual_solution, + dual_solution.get(), CUSPARSE_SPMV_CSR_ALG2, &buffer_size_non_transpose, handle_ptr->get_stream())); @@ -619,10 +414,10 @@ cusparse_view_t::cusparse_view_t( raft::sparse::detail::cusparsespmv_buffersize(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A_T, - dual_solution, + A_T.get(), + dual_solution.get(), beta.data(), - c, + c.get(), CUSPARSE_SPMV_CSR_ALG2, &buffer_size_transpose, handle_ptr->get_stream())); @@ -636,10 +431,10 @@ cusparse_view_t::cusparse_view_t( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A_T, - batch_delta_dual_solutions, + A_T.get(), + batch_delta_dual_solutions.get(), beta.data(), - batch_tmp_primals, + batch_tmp_primals.get(), CUSPARSE_SPMM_CSR_ALG3, &buffer_size_transpose_batch, handle_ptr->get_stream())); @@ -651,10 +446,10 @@ cusparse_view_t::cusparse_view_t( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A, - batch_delta_primal_solutions, + A.get(), + batch_delta_primal_solutions.get(), beta.data(), - batch_tmp_duals, + batch_tmp_duals.get(), CUSPARSE_SPMM_CSR_ALG3, &buffer_size_non_transpose_batch, handle_ptr->get_stream())); @@ -668,10 +463,10 @@ cusparse_view_t::cusparse_view_t( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A_T, - batch_dual_solutions, + A_T.get(), + batch_dual_solutions.get(), beta.data(), - batch_current_AtYs, + batch_current_AtYs.get(), (deterministic_batch_pdlp) ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2, &buffer_size_transpose_batch_row_row, handle_ptr->get_stream())); @@ -683,10 +478,10 @@ cusparse_view_t::cusparse_view_t( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A, - batch_reflected_primal_solutions, + A.get(), + batch_reflected_primal_solutions.get(), beta.data(), - batch_dual_gradients, + batch_dual_gradients.get(), (deterministic_batch_pdlp) ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2, &buffer_size_non_transpose_batch_row_row, handle_ptr->get_stream())); @@ -698,10 +493,10 @@ cusparse_view_t::cusparse_view_t( my_cusparsespmv_preprocess(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A, - c, + A.get(), + c.get(), beta.data(), - dual_solution, + dual_solution.get(), CUSPARSE_SPMV_CSR_ALG2, buffer_non_transpose.data(), handle_ptr->get_stream()); @@ -709,10 +504,10 @@ cusparse_view_t::cusparse_view_t( my_cusparsespmv_preprocess(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A_T, - dual_solution, + A_T.get(), + dual_solution.get(), beta.data(), - c, + c.get(), CUSPARSE_SPMV_CSR_ALG2, buffer_transpose.data(), handle_ptr->get_stream()); @@ -720,10 +515,10 @@ cusparse_view_t::cusparse_view_t( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A_T, - batch_delta_dual_solutions, + A_T.get(), + batch_delta_dual_solutions.get(), beta.data(), - batch_tmp_primals, + batch_tmp_primals.get(), CUSPARSE_SPMM_CSR_ALG3, buffer_transpose_batch.data(), handle_ptr->get_stream()); @@ -732,10 +527,10 @@ cusparse_view_t::cusparse_view_t( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A, - batch_delta_primal_solutions, + A.get(), + batch_delta_primal_solutions.get(), beta.data(), - batch_tmp_duals, + batch_tmp_duals.get(), CUSPARSE_SPMM_CSR_ALG3, buffer_non_transpose_batch.data(), handle_ptr->get_stream()); @@ -745,10 +540,10 @@ cusparse_view_t::cusparse_view_t( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A_T, - batch_dual_solutions, + A_T.get(), + batch_dual_solutions.get(), beta.data(), - batch_current_AtYs, + batch_current_AtYs.get(), (deterministic_batch_pdlp) ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2, buffer_transpose_batch_row_row_.data(), handle_ptr->get_stream()); @@ -757,10 +552,10 @@ cusparse_view_t::cusparse_view_t( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A, - batch_reflected_primal_solutions, + A.get(), + batch_reflected_primal_solutions.get(), beta.data(), - batch_dual_gradients, + batch_dual_gradients.get(), (deterministic_batch_pdlp) ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2, buffer_non_transpose_batch_row_row_.data(), handle_ptr->get_stream()); @@ -786,19 +581,18 @@ cusparse_view_t::cusparse_view_t( double_to_float_functor{}, handle_ptr->get_stream().value())); - A_mixed_.create(op_problem_scaled.n_constraints, - op_problem_scaled.n_variables, - op_problem_scaled.nnz, - const_cast(op_problem_scaled.offsets.data()), - const_cast(op_problem_scaled.variables.data()), - A_float_.data()); - - A_T_mixed_.create(op_problem_scaled.n_variables, - op_problem_scaled.n_constraints, - op_problem_scaled.nnz, - const_cast(A_T_offsets_.data()), - const_cast(A_T_indices_.data()), - A_T_float_.data()); + A_mixed_ = make_csr(op_problem_scaled.n_constraints, + op_problem_scaled.n_variables, + op_problem_scaled.nnz, + const_cast(op_problem_scaled.offsets.data()), + const_cast(op_problem_scaled.variables.data()), + A_float_.data()); + A_T_mixed_ = make_csr(op_problem_scaled.n_variables, + op_problem_scaled.n_constraints, + op_problem_scaled.nnz, + const_cast(A_T_offsets_.data()), + const_cast(A_T_indices_.data()), + A_T_float_.data()); const rmm::device_scalar alpha_d{1.0, handle_ptr->get_stream()}; const rmm::device_scalar beta_d{0.0, handle_ptr->get_stream()}; @@ -807,10 +601,10 @@ cusparse_view_t::cusparse_view_t( mixed_precision_spmv_buffersize(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.data(), - A_mixed_, - c, + A_mixed_.get(), + c.get(), beta_d.data(), - dual_solution, + dual_solution.get(), CUSPARSE_SPMV_CSR_ALG2, handle_ptr->get_stream()); buffer_non_transpose_mixed_.resize(buffer_size_non_transpose_mixed, handle_ptr->get_stream()); @@ -819,10 +613,10 @@ cusparse_view_t::cusparse_view_t( mixed_precision_spmv_buffersize(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.data(), - A_T_mixed_, - dual_solution, + A_T_mixed_.get(), + dual_solution.get(), beta_d.data(), - c, + c.get(), CUSPARSE_SPMV_CSR_ALG2, handle_ptr->get_stream()); buffer_transpose_mixed_.resize(buffer_size_transpose_mixed, handle_ptr->get_stream()); @@ -831,10 +625,10 @@ cusparse_view_t::cusparse_view_t( mixed_precision_spmv_preprocess(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.data(), - A_mixed_, - c, + A_mixed_.get(), + c.get(), beta_d.data(), - dual_solution, + dual_solution.get(), CUSPARSE_SPMV_CSR_ALG2, buffer_non_transpose_mixed_.data(), handle_ptr->get_stream()); @@ -842,10 +636,10 @@ cusparse_view_t::cusparse_view_t( mixed_precision_spmv_preprocess(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.data(), - A_T_mixed_, - dual_solution, + A_T_mixed_.get(), + dual_solution.get(), beta_d.data(), - c, + c.get(), CUSPARSE_SPMV_CSR_ALG2, buffer_transpose_mixed_.data(), handle_ptr->get_stream()); @@ -873,15 +667,6 @@ cusparse_view_t::cusparse_view_t( const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params) : batch_mode_(climber_strategies.size() > 1), handle_ptr_(handle_ptr), - A{}, - A_T{}, - c{}, - primal_solution{}, - dual_solution{}, - primal_gradient{}, - dual_gradient{}, - tmp_primal{}, - tmp_dual{}, A_T_{_A_T}, A_T_offsets_{_A_T_offsets}, A_T_indices_{_A_T_indices}, @@ -912,56 +697,57 @@ cusparse_view_t::cusparse_view_t( handle_ptr_->get_cusparse_handle(), CUSPARSE_POINTER_MODE_DEVICE, handle_ptr->get_stream())); // setup cusparse view - A.create(op_problem.n_constraints, - op_problem.n_variables, - op_problem.nnz, - const_cast(op_problem.offsets.data()), - const_cast(op_problem.variables.data()), - const_cast(op_problem.coefficients.data())); - - A_T.create(op_problem.n_variables, - op_problem.n_constraints, - op_problem.nnz, - const_cast(A_T_offsets_.data()), - const_cast(A_T_indices_.data()), - const_cast(A_T_.data())); - - c.create(op_problem.n_variables, const_cast(op_problem.objective_coefficients.data())); + A = make_csr(op_problem.n_constraints, + op_problem.n_variables, + op_problem.nnz, + const_cast(op_problem.offsets.data()), + const_cast(op_problem.variables.data()), + const_cast(op_problem.coefficients.data())); + + A_T = make_csr(op_problem.n_variables, + op_problem.n_constraints, + op_problem.nnz, + const_cast(A_T_offsets_.data()), + const_cast(A_T_indices_.data()), + const_cast(A_T_.data())); + + c = make_dnvec(op_problem.n_variables, + const_cast(op_problem.objective_coefficients.data())); if (!hyper_params.use_adaptive_step_size_strategy) { - primal_solution.create(op_problem.n_variables, _potential_next_primal.data()); - dual_solution.create(op_problem.n_constraints, _potential_next_dual.data()); + primal_solution = make_dnvec(op_problem.n_variables, _potential_next_primal.data()); + dual_solution = make_dnvec(op_problem.n_constraints, _potential_next_dual.data()); } else { - primal_solution.create(op_problem.n_variables, _primal_solution.data()); - dual_solution.create(op_problem.n_constraints, _dual_solution.data()); + primal_solution = make_dnvec(op_problem.n_variables, _primal_solution.data()); + dual_solution = make_dnvec(op_problem.n_constraints, _dual_solution.data()); } - tmp_primal.create(op_problem.n_variables, _tmp_primal.data()); - tmp_dual.create(op_problem.n_constraints, _tmp_dual.data()); + tmp_primal = make_dnvec(op_problem.n_variables, _tmp_primal.data()); + tmp_dual = make_dnvec(op_problem.n_constraints, _tmp_dual.data()); if (batch_mode_) { [[maybe_unused]] const bool is_cupdlpx = is_cupdlpx_restart(hyper_params); cuopt_assert(is_cupdlpx, "Batch mode only supported with cuPDLPx restart"); - batch_primal_solutions.create(op_problem.n_variables, - climber_strategies.size(), - op_problem.n_variables, - _potential_next_primal.data(), - CUSPARSE_ORDER_COL); - batch_dual_solutions.create(op_problem.n_constraints, - climber_strategies.size(), - op_problem.n_constraints, - _potential_next_dual.data(), - CUSPARSE_ORDER_COL); - batch_tmp_duals.create(op_problem.n_constraints, - climber_strategies.size(), - op_problem.n_constraints, - _tmp_dual.data(), - CUSPARSE_ORDER_COL); - batch_tmp_primals.create(op_problem.n_variables, - climber_strategies.size(), - op_problem.n_variables, - _tmp_primal.data(), - CUSPARSE_ORDER_COL); + batch_primal_solutions = make_dnmat(op_problem.n_variables, + climber_strategies.size(), + op_problem.n_variables, + _potential_next_primal.data(), + CUSPARSE_ORDER_COL); + batch_dual_solutions = make_dnmat(op_problem.n_constraints, + climber_strategies.size(), + op_problem.n_constraints, + _potential_next_dual.data(), + CUSPARSE_ORDER_COL); + batch_tmp_duals = make_dnmat(op_problem.n_constraints, + climber_strategies.size(), + op_problem.n_constraints, + _tmp_dual.data(), + CUSPARSE_ORDER_COL); + batch_tmp_primals = make_dnmat(op_problem.n_variables, + climber_strategies.size(), + op_problem.n_variables, + _tmp_primal.data(), + CUSPARSE_ORDER_COL); } const rmm::device_scalar alpha{1, handle_ptr->get_stream()}; @@ -971,10 +757,10 @@ cusparse_view_t::cusparse_view_t( raft::sparse::detail::cusparsespmv_buffersize(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A, - c, + A.get(), + c.get(), beta.data(), - dual_solution, + dual_solution.get(), CUSPARSE_SPMV_CSR_ALG2, &buffer_size_non_transpose, handle_ptr->get_stream())); @@ -985,10 +771,10 @@ cusparse_view_t::cusparse_view_t( raft::sparse::detail::cusparsespmv_buffersize(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A_T, - dual_solution, + A_T.get(), + dual_solution.get(), beta.data(), - c, + c.get(), CUSPARSE_SPMV_CSR_ALG2, &buffer_size_transpose, handle_ptr->get_stream())); @@ -1002,10 +788,10 @@ cusparse_view_t::cusparse_view_t( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A_T, - batch_dual_solutions, + A_T.get(), + batch_dual_solutions.get(), beta.data(), - batch_tmp_primals, + batch_tmp_primals.get(), CUSPARSE_SPMM_CSR_ALG3, &buffer_size_transpose_batch, handle_ptr->get_stream())); @@ -1016,10 +802,10 @@ cusparse_view_t::cusparse_view_t( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A, - batch_primal_solutions, + A.get(), + batch_primal_solutions.get(), beta.data(), - batch_tmp_duals, + batch_tmp_duals.get(), CUSPARSE_SPMM_CSR_ALG3, &buffer_size_non_transpose_batch, handle_ptr->get_stream())); @@ -1030,10 +816,10 @@ cusparse_view_t::cusparse_view_t( my_cusparsespmv_preprocess(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A, - c, + A.get(), + c.get(), beta.data(), - dual_solution, + dual_solution.get(), CUSPARSE_SPMV_CSR_ALG2, buffer_non_transpose.data(), handle_ptr->get_stream()); @@ -1041,10 +827,10 @@ cusparse_view_t::cusparse_view_t( my_cusparsespmv_preprocess(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A_T, - dual_solution, + A_T.get(), + dual_solution.get(), beta.data(), - c, + c.get(), CUSPARSE_SPMV_CSR_ALG2, buffer_transpose.data(), handle_ptr->get_stream()); @@ -1054,10 +840,10 @@ cusparse_view_t::cusparse_view_t( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A, - batch_primal_solutions, + A.get(), + batch_primal_solutions.get(), beta.data(), - batch_tmp_duals, + batch_tmp_duals.get(), CUSPARSE_SPMM_CSR_ALG3, buffer_non_transpose_batch.data(), handle_ptr->get_stream()); @@ -1066,10 +852,10 @@ cusparse_view_t::cusparse_view_t( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A_T, - batch_dual_solutions, + A_T.get(), + batch_dual_solutions.get(), beta.data(), - batch_tmp_primals, + batch_tmp_primals.get(), CUSPARSE_SPMM_CSR_ALG3, buffer_transpose_batch.data(), handle_ptr->get_stream()); @@ -1077,8 +863,11 @@ cusparse_view_t::cusparse_view_t( #endif } -// Constructor used 3 times in restart strategy for the duality gaps -// Used in trust region restart +// Constructor used 3 times in restart strategy for the duality gaps (trust region restart). +// All cuSPARSE descriptors are recreated (no borrowing) to avoid the post-CUDA-12.4 segfault +// observed when copying cusparseSpMatDescr_t handles. tmp_primal/tmp_dual underlying buffer +// pointers are recovered from existing_cusparse_view via cusparseDnVecGetValues; the buffers +// themselves are owned by the parent pdhg_solver_t and outlive every cusparse_view_t. template cusparse_view_t::cusparse_view_t( raft::handle_t const* handle_ptr, @@ -1089,13 +878,6 @@ cusparse_view_t::cusparse_view_t( f_t* _primal_gradient, f_t* _dual_gradient) : handle_ptr_(handle_ptr), - c(existing_cusparse_view.c), - primal_solution{}, - dual_solution{}, - primal_gradient{}, - dual_gradient{}, - tmp_primal(existing_cusparse_view.tmp_primal), - tmp_dual(existing_cusparse_view.tmp_dual), buffer_non_transpose{0, handle_ptr->get_stream()}, buffer_transpose{0, handle_ptr->get_stream()}, buffer_non_transpose_spmvop{0, handle_ptr->get_stream()}, @@ -1125,29 +907,41 @@ cusparse_view_t::cusparse_view_t( RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsesetpointermode( handle_ptr_->get_cusparse_handle(), CUSPARSE_POINTER_MODE_DEVICE, handle_ptr->get_stream())); - // Need to reinstanciate the cuSparse views - // Copying them from the existing cuSparse view is a bad practice and creates segfault post - // CUDA 12.4 Using the saved pointer of the existing cusparse view to make sure we capture the - // correct pointer - A.create(op_problem.n_constraints, - op_problem.n_variables, - op_problem.nnz, - const_cast(A_offsets_.data()), - const_cast(A_indices_.data()), - const_cast(A_.data())); - - A_T.create(op_problem.n_variables, - op_problem.n_constraints, - op_problem.nnz, - const_cast(existing_cusparse_view.A_T_offsets_.data()), - const_cast(existing_cusparse_view.A_T_indices_.data()), - const_cast(existing_cusparse_view.A_T_.data())); - - primal_solution.create(op_problem.n_variables, _primal_solution); - dual_solution.create(op_problem.n_constraints, _dual_solution); - - primal_gradient.create(op_problem.n_variables, _primal_gradient); - dual_gradient.create(op_problem.n_constraints, _dual_gradient); + // All descriptors are reinstantiated below from the saved data pointers (a copy of the + // cusparseSpMatDescr_t handles segfaults under CUDA 12.4+). + A = make_csr(op_problem.n_constraints, + op_problem.n_variables, + op_problem.nnz, + const_cast(A_offsets_.data()), + const_cast(A_indices_.data()), + const_cast(A_.data())); + + A_T = make_csr(op_problem.n_variables, + op_problem.n_constraints, + op_problem.nnz, + const_cast(existing_cusparse_view.A_T_offsets_.data()), + const_cast(existing_cusparse_view.A_T_indices_.data()), + const_cast(existing_cusparse_view.A_T_.data())); + + c = make_dnvec(op_problem.n_variables, + const_cast(op_problem.objective_coefficients.data())); + + primal_solution = make_dnvec(op_problem.n_variables, _primal_solution); + dual_solution = make_dnvec(op_problem.n_constraints, _dual_solution); + + primal_gradient = make_dnvec(op_problem.n_variables, _primal_gradient); + dual_gradient = make_dnvec(op_problem.n_constraints, _dual_gradient); + + // Recover the underlying pdhg-owned scratch buffers from the existing view's descriptors so + // the new descriptors point at the same memory without plumbing the rmm::device_uvector refs + // through pdlp_restart_strategy_t. + void* tmp_primal_data{nullptr}; + void* tmp_dual_data{nullptr}; + RAFT_CUSPARSE_TRY( + cusparseDnVecGetValues(existing_cusparse_view.tmp_primal.get(), &tmp_primal_data)); + RAFT_CUSPARSE_TRY(cusparseDnVecGetValues(existing_cusparse_view.tmp_dual.get(), &tmp_dual_data)); + tmp_primal = make_dnvec(op_problem.n_variables, static_cast(tmp_primal_data)); + tmp_dual = make_dnvec(op_problem.n_constraints, static_cast(tmp_dual_data)); const rmm::device_scalar alpha{1, handle_ptr->get_stream()}; const rmm::device_scalar beta{1, handle_ptr->get_stream()}; @@ -1156,10 +950,10 @@ cusparse_view_t::cusparse_view_t( raft::sparse::detail::cusparsespmv_buffersize(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A, - c, + A.get(), + c.get(), beta.data(), - dual_solution, + dual_solution.get(), CUSPARSE_SPMV_CSR_ALG2, &buffer_size_non_transpose, handle_ptr->get_stream())); @@ -1170,10 +964,10 @@ cusparse_view_t::cusparse_view_t( raft::sparse::detail::cusparsespmv_buffersize(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A_T, - dual_solution, + A_T.get(), + dual_solution.get(), beta.data(), - c, + c.get(), CUSPARSE_SPMV_CSR_ALG2, &buffer_size_transpose, handle_ptr->get_stream())); @@ -1184,10 +978,10 @@ cusparse_view_t::cusparse_view_t( my_cusparsespmv_preprocess(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A, - c, + A.get(), + c.get(), beta.data(), - dual_solution, + dual_solution.get(), CUSPARSE_SPMV_CSR_ALG2, buffer_non_transpose.data(), handle_ptr->get_stream()); @@ -1195,10 +989,10 @@ cusparse_view_t::cusparse_view_t( my_cusparsespmv_preprocess(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), - A_T, - dual_solution, + A_T.get(), + dual_solution.get(), beta.data(), - c, + c.get(), CUSPARSE_SPMV_CSR_ALG2, buffer_transpose.data(), handle_ptr->get_stream()); @@ -1265,26 +1059,26 @@ template void cusparse_view_t::redirect_cusparse_csr_structure_pointers( const problem_t& original_problem) { - RAFT_CUSPARSE_TRY(cusparseCsrSetPointers(A, + RAFT_CUSPARSE_TRY(cusparseCsrSetPointers(A.get(), const_cast(original_problem.offsets.data()), const_cast(original_problem.variables.data()), const_cast(A_.data()))); RAFT_CUSPARSE_TRY( - cusparseCsrSetPointers(A_T, + cusparseCsrSetPointers(A_T.get(), const_cast(original_problem.reverse_offsets.data()), const_cast(original_problem.reverse_constraints.data()), const_cast(A_T_.data()))); if constexpr (std::is_same_v) { if (mixed_precision_enabled_) { - RAFT_CUSPARSE_TRY(cusparseCsrSetPointers(A_mixed_, + RAFT_CUSPARSE_TRY(cusparseCsrSetPointers(A_mixed_.get(), const_cast(original_problem.offsets.data()), const_cast(original_problem.variables.data()), A_float_.data())); RAFT_CUSPARSE_TRY( - cusparseCsrSetPointers(A_T_mixed_, + cusparseCsrSetPointers(A_T_mixed_.get(), const_cast(original_problem.reverse_offsets.data()), const_cast(original_problem.reverse_constraints.data()), A_T_float_.data())); @@ -1296,10 +1090,10 @@ void cusparse_view_t::redirect_cusparse_csr_structure_pointers( size_t mixed_precision_spmv_buffersize(cusparseHandle_t handle, cusparseOperation_t opA, const double* alpha, - cusparseSpMatDescr_t matA, // FP32 matrix - cusparseDnVecDescr_t vecX, // FP64 vector + cusparse_sp_mat_descr_view matA, // FP32 matrix + cusparse_dn_vec_descr_view vecX, // FP64 vector const double* beta, - cusparseDnVecDescr_t vecY, // FP64 vector + cusparse_dn_vec_descr_view vecY, // FP64 vector cusparseSpMVAlg_t alg, cudaStream_t stream) { @@ -1313,10 +1107,10 @@ size_t mixed_precision_spmv_buffersize(cusparseHandle_t handle, void mixed_precision_spmv(cusparseHandle_t handle, cusparseOperation_t opA, const double* alpha, - cusparseSpMatDescr_t matA, // FP32 matrix - cusparseDnVecDescr_t vecX, // FP64 vector + cusparse_sp_mat_descr_view matA, // FP32 matrix + cusparse_dn_vec_descr_view vecX, // FP64 vector const double* beta, - cusparseDnVecDescr_t vecY, // FP64 vector + cusparse_dn_vec_descr_view vecY, // FP64 vector cusparseSpMVAlg_t alg, void* externalBuffer, cudaStream_t stream) @@ -1330,10 +1124,10 @@ void mixed_precision_spmv(cusparseHandle_t handle, void mixed_precision_spmv_preprocess(cusparseHandle_t handle, cusparseOperation_t opA, const double* alpha, - cusparseSpMatDescr_t matA, // FP32 matrix - cusparseDnVecDescr_t vecX, // FP64 vector + cusparse_sp_mat_descr_view matA, // FP32 matrix + cusparse_dn_vec_descr_view vecX, // FP64 vector const double* beta, - cusparseDnVecDescr_t vecY, // FP64 vector + cusparse_dn_vec_descr_view vecY, // FP64 vector cusparseSpMVAlg_t alg, void* externalBuffer, cudaStream_t stream) @@ -1383,62 +1177,57 @@ void cusparse_view_t::create_spmv_op_plans(bool is_reflected) size_t buffer_size_transpose = 0; RAFT_CUSPARSE_TRY((*buffer_size)(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, - A_T, - dual_solution, - current_AtY, - current_AtY, + A_T.get(), + dual_solution.get(), + current_AtY.get(), + current_AtY.get(), CUDA_R_64F, &buffer_size_transpose)); buffer_transpose_spmvop.resize(buffer_size_transpose, handle_ptr_->get_stream()); - spmv_op_descr_A_t_.create(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - A_T, - dual_solution, - current_AtY, - current_AtY, - CUDA_R_64F, - buffer_transpose_spmvop); + spmv_op_descr_A_t_ = make_spmvop_descr(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + A_T.get(), + dual_solution.get(), + current_AtY.get(), + current_AtY.get(), + CUDA_R_64F, + buffer_transpose_spmvop); - spmv_op_plan_A_t_.create(handle_ptr_->get_cusparse_handle(), spmv_op_descr_A_t_); + spmv_op_plan_A_t_ = + make_spmvop_plan(handle_ptr_->get_cusparse_handle(), spmv_op_descr_A_t_.get()); // Only prepare buffers for A_x if we are using reflected_halpern if (is_reflected) { size_t buffer_size_non_transpose = 0; RAFT_CUSPARSE_TRY((*buffer_size)(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, - A, - reflected_primal_solution, - dual_gradient, - dual_gradient, + A.get(), + reflected_primal_solution.get(), + dual_gradient.get(), + dual_gradient.get(), CUDA_R_64F, &buffer_size_non_transpose)); buffer_non_transpose_spmvop.resize(buffer_size_non_transpose, handle_ptr_->get_stream()); - spmv_op_descr_A_.create(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - A, - reflected_primal_solution, - dual_gradient, - dual_gradient, - CUDA_R_64F, - buffer_non_transpose_spmvop); + spmv_op_descr_A_ = make_spmvop_descr(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + A.get(), + reflected_primal_solution.get(), + dual_gradient.get(), + dual_gradient.get(), + CUDA_R_64F, + buffer_non_transpose_spmvop); - spmv_op_plan_A_.create(handle_ptr_->get_cusparse_handle(), spmv_op_descr_A_); + spmv_op_plan_A_ = make_spmvop_plan(handle_ptr_->get_cusparse_handle(), spmv_op_descr_A_.get()); } #endif } #if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT -template class cusparse_sp_mat_descr_wrapper_t; -template class cusparse_dn_vec_descr_wrapper_t; -template class cusparse_dn_mat_descr_wrapper_t; template class cusparse_view_t; #endif #if MIP_INSTANTIATE_DOUBLE -template class cusparse_sp_mat_descr_wrapper_t; -template class cusparse_dn_vec_descr_wrapper_t; -template class cusparse_dn_mat_descr_wrapper_t; template class cusparse_view_t; #endif diff --git a/cpp/src/pdlp/cusparse_view.hpp b/cpp/src/pdlp/cusparse_view.hpp index a76644c21e..0250531730 100644 --- a/cpp/src/pdlp/cusparse_view.hpp +++ b/cpp/src/pdlp/cusparse_view.hpp @@ -20,135 +20,125 @@ #include +#include +#include + #define CUDA_VER_13_2_UP (CUDART_VERSION >= 13020) namespace cuopt::linear_programming::detail { -template -class cusparse_sp_mat_descr_wrapper_t { - public: - cusparse_sp_mat_descr_wrapper_t(); - ~cusparse_sp_mat_descr_wrapper_t(); - - cusparse_sp_mat_descr_wrapper_t(const cusparse_sp_mat_descr_wrapper_t& other); - - cusparse_sp_mat_descr_wrapper_t& operator=(const cusparse_sp_mat_descr_wrapper_t& other) = delete; - - void create(int64_t m, int64_t n, int64_t nnz, i_t* offsets, i_t* indices, f_t* values); - - operator cusparseSpMatDescr_t() const; +// --------------------------------------------------------------------------- +// Deleters and unique_ptr aliases for cuSPARSE opaque handles. +// +// Each cuSPARSE handle (cusparseSpMatDescr_t etc.) is a typedef for a pointer +// to an opaque struct. We use std::remove_pointer_t to feed unique_ptr the +// pointee type so that unique_ptr<...>::pointer matches the cuSPARSE handle. +// --------------------------------------------------------------------------- + +struct cusparse_sp_mat_deleter { + void operator()(cusparseSpMatDescr_t descr) const noexcept + { + if (descr) { RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(descr)); } + } +}; - private: - cusparseSpMatDescr_t descr_; - bool need_destruction_; +struct cusparse_dn_vec_deleter { + void operator()(cusparseDnVecDescr_t descr) const noexcept + { + if (descr) { RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(descr)); } + } }; -template -class cusparse_dn_vec_descr_wrapper_t { - public: - cusparse_dn_vec_descr_wrapper_t(); - ~cusparse_dn_vec_descr_wrapper_t(); +struct cusparse_dn_mat_deleter { + void operator()(cusparseDnMatDescr_t descr) const noexcept + { + if (descr) { RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(descr)); } + } +}; - cusparse_dn_vec_descr_wrapper_t(const cusparse_dn_vec_descr_wrapper_t& other); - cusparse_dn_vec_descr_wrapper_t& operator=(cusparse_dn_vec_descr_wrapper_t&& other); - cusparse_dn_vec_descr_wrapper_t& operator=(const cusparse_dn_vec_descr_wrapper_t& other) = delete; +using cusparse_sp_mat_uptr = + std::unique_ptr, cusparse_sp_mat_deleter>; +using cusparse_dn_vec_uptr = + std::unique_ptr, cusparse_dn_vec_deleter>; +using cusparse_dn_mat_uptr = + std::unique_ptr, cusparse_dn_mat_deleter>; - void create(int64_t size, f_t* values); +// Borrowed views: identical to the raw cuSPARSE handle types but the alias makes the non-owning +// intent explicit at API boundaries. Pair with the *_uptr aliases above: +// _uptr -> owns the descriptor; the destructor calls cusparseDestroy* +// _view -> non-owning, just the raw handle, lifetime managed elsewhere +using cusparse_sp_mat_descr_view = cusparseSpMatDescr_t; +using cusparse_dn_vec_descr_view = cusparseDnVecDescr_t; +using cusparse_dn_mat_descr_view = cusparseDnMatDescr_t; - operator cusparseDnVecDescr_t() const; +// Factory functions replacing the old `wrapper.create(...)` two-phase init. - private: - cusparseDnVecDescr_t descr_; - bool need_destruction_; -}; +template +inline cusparse_sp_mat_uptr make_csr( + int64_t m, int64_t n, int64_t nnz, i_t* offsets, i_t* indices, f_t* values) +{ + cusparseSpMatDescr_t descr{nullptr}; + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsecreatecsr(&descr, m, n, nnz, offsets, indices, values)); + return cusparse_sp_mat_uptr{descr}; +} template -class cusparse_dn_mat_descr_wrapper_t { - public: - cusparse_dn_mat_descr_wrapper_t(); - ~cusparse_dn_mat_descr_wrapper_t(); - - cusparse_dn_mat_descr_wrapper_t(const cusparse_dn_mat_descr_wrapper_t& other); - cusparse_dn_mat_descr_wrapper_t& operator=(cusparse_dn_mat_descr_wrapper_t&& other); - cusparse_dn_mat_descr_wrapper_t& operator=(const cusparse_dn_mat_descr_wrapper_t& other) = delete; +inline cusparse_dn_vec_uptr make_dnvec(int64_t size, f_t* values) +{ + cusparseDnVecDescr_t descr{nullptr}; + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(&descr, size, values)); + return cusparse_dn_vec_uptr{descr}; +} - void create(int64_t row, int64_t col, int64_t ld, f_t* values, cusparseOrder_t order); - - operator cusparseDnMatDescr_t() const; - - private: - cusparseDnMatDescr_t descr_; - bool need_destruction_; -}; +template +inline cusparse_dn_mat_uptr make_dnmat( + int64_t row, int64_t col, int64_t ld, f_t* values, cusparseOrder_t order) +{ + cusparseDnMatDescr_t descr{nullptr}; + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsecreatednmat(&descr, row, col, ld, values, order)); + return cusparse_dn_mat_uptr{descr}; +} #if CUDA_VER_13_2_UP -// RAII wrapper around cusparse SpMVOp objects. All the buffers are owned by the cusparse_view_t. -class cusparse_spmvop_descr_wrapper_t { - public: - cusparse_spmvop_descr_wrapper_t(); - ~cusparse_spmvop_descr_wrapper_t(); - - cusparse_spmvop_descr_wrapper_t(const cusparse_spmvop_descr_wrapper_t& other); - cusparse_spmvop_descr_wrapper_t& operator=(cusparse_spmvop_descr_wrapper_t&& other); - cusparse_spmvop_descr_wrapper_t& operator=(const cusparse_spmvop_descr_wrapper_t& other) = delete; - - void create(cusparseHandle_t handle, - cusparseOperation_t opA, - cusparseSpMatDescr_t matA, - cusparseDnVecDescr_t vecX, - cusparseDnVecDescr_t vecY, - cusparseDnVecDescr_t vecZ, - cudaDataType computeType, - rmm::device_uvector& buffer); - - operator cusparseSpMVOpDescr_t() const; - - private: - // Forwards to cusparseSpMVOp_{create,destroy}Descr resolved via dlsym (cached on first call). - // This is needed because the cusparseSpMVOp_{create,destroy}Descr symbols might not be defined in - // current runtime. - static cusparseStatus_t dlsym_create(cusparseHandle_t handle, - cusparseSpMVOpDescr_t* descr, - cusparseOperation_t opA, - cusparseSpMatDescr_t matA, - cusparseDnVecDescr_t vecX, - cusparseDnVecDescr_t vecY, - cusparseDnVecDescr_t vecZ, - cudaDataType computeType, - void* buffer); - static cusparseStatus_t dlsym_destroy(cusparseSpMVOpDescr_t descr); - - cusparseSpMVOpDescr_t descr_; - bool need_destruction_; +// --------------------------------------------------------------------------- +// SpMVOp descriptor and plan deleters. +// +// The cusparseSpMVOp_{create,destroy}{Descr,Plan} symbols may not be present +// in the runtime cuSPARSE (the compiled CUDA version may differ from the one +// at runtime), so destruction is dispatched through dlsym. The deleters below +// resolve the destroy symbol at first use and cache it via a function-local +// static. +// --------------------------------------------------------------------------- + +struct cusparse_spmvop_descr_deleter { + void operator()(cusparseSpMVOpDescr_t descr) const noexcept; }; -class cusparse_spmvop_plan_wrapper_t { - public: - cusparse_spmvop_plan_wrapper_t(); - ~cusparse_spmvop_plan_wrapper_t(); - - cusparse_spmvop_plan_wrapper_t(const cusparse_spmvop_plan_wrapper_t& other); - cusparse_spmvop_plan_wrapper_t& operator=(cusparse_spmvop_plan_wrapper_t&& other); - cusparse_spmvop_plan_wrapper_t& operator=(const cusparse_spmvop_plan_wrapper_t& other) = delete; - - void create(cusparseHandle_t handle, cusparseSpMVOpDescr_t descr); - - operator cusparseSpMVOpPlan_t() const; - - private: - // Forwards to cusparseSpMVOp_{create,destroy}Plan resolved via dlsym (cached on first call). - // This is needed because the cusparseSpMVOp_{create,destroy}Plan symbols might not be defined in - // current runtime. - static cusparseStatus_t dlsym_create(cusparseHandle_t handle, - cusparseSpMVOpDescr_t descr, - cusparseSpMVOpPlan_t* plan, - char* ltoIRBuf, - size_t ltoIRSize); - static cusparseStatus_t dlsym_destroy(cusparseSpMVOpPlan_t plan); - - cusparseSpMVOpPlan_t plan_; - bool need_destruction_; +struct cusparse_spmvop_plan_deleter { + void operator()(cusparseSpMVOpPlan_t plan) const noexcept; }; + +using cusparse_spmvop_descr_uptr = + std::unique_ptr, cusparse_spmvop_descr_deleter>; +using cusparse_spmvop_plan_uptr = + std::unique_ptr, cusparse_spmvop_plan_deleter>; + +// Factories. `make_spmvop_descr` resolves cusparseSpMVOp_createDescr via dlsym. +cusparse_spmvop_descr_uptr make_spmvop_descr(cusparseHandle_t handle, + cusparseOperation_t opA, + cusparse_sp_mat_descr_view matA, + cusparse_dn_vec_descr_view vecX, + cusparse_dn_vec_descr_view vecY, + cusparse_dn_vec_descr_view vecZ, + cudaDataType computeType, + rmm::device_uvector& buffer); + +// `make_spmvop_plan` passes nullptr/0 for ltoIRBuf/ltoIRSize so cuSPARSE JITs +// internally; cuOpt does not supply user-provided LTO IR. +cusparse_spmvop_plan_uptr make_spmvop_plan(cusparseHandle_t handle, + cusparseSpMVOpDescr_t descr); #endif template @@ -197,48 +187,47 @@ class cusparse_view_t { raft::handle_t const* handle_ptr_{nullptr}; // cusparse view of linear program - cusparse_sp_mat_descr_wrapper_t A; - cusparse_sp_mat_descr_wrapper_t A_T; - cusparse_dn_vec_descr_wrapper_t c; + cusparse_sp_mat_uptr A; + cusparse_sp_mat_uptr A_T; + cusparse_dn_vec_uptr c; // cusparse view of solutions - cusparse_dn_vec_descr_wrapper_t primal_solution; - cusparse_dn_vec_descr_wrapper_t dual_solution; + cusparse_dn_vec_uptr primal_solution; + cusparse_dn_vec_uptr dual_solution; // cusparse view of gradients - cusparse_dn_vec_descr_wrapper_t primal_gradient; - cusparse_dn_vec_descr_wrapper_t dual_gradient; + cusparse_dn_vec_uptr primal_gradient; + cusparse_dn_vec_uptr dual_gradient; // cusparse view of batch gradients - cusparse_dn_mat_descr_wrapper_t batch_dual_gradients; + cusparse_dn_mat_uptr batch_dual_gradients; // cusparse view of batch solutions - cusparse_dn_mat_descr_wrapper_t batch_primal_solutions; - cusparse_dn_mat_descr_wrapper_t batch_dual_solutions; - cusparse_dn_mat_descr_wrapper_t batch_potential_next_dual_solution; - cusparse_dn_mat_descr_wrapper_t batch_next_AtYs; - cusparse_dn_mat_descr_wrapper_t batch_tmp_duals; - cusparse_dn_mat_descr_wrapper_t batch_reflected_primal_solutions; - cusparse_dn_mat_descr_wrapper_t batch_delta_primal_solutions; - cusparse_dn_mat_descr_wrapper_t batch_delta_dual_solutions; + cusparse_dn_mat_uptr batch_primal_solutions; + cusparse_dn_mat_uptr batch_dual_solutions; + cusparse_dn_mat_uptr batch_potential_next_dual_solution; + cusparse_dn_mat_uptr batch_next_AtYs; + cusparse_dn_mat_uptr batch_tmp_duals; + cusparse_dn_mat_uptr batch_reflected_primal_solutions; + cusparse_dn_mat_uptr batch_delta_primal_solutions; + cusparse_dn_mat_uptr batch_delta_dual_solutions; // cusparse view of At * Y batch computation - cusparse_dn_mat_descr_wrapper_t batch_current_AtYs; + cusparse_dn_mat_uptr batch_current_AtYs; // cusparse view of auxillirary space needed for some spmm computations - cusparse_dn_mat_descr_wrapper_t batch_tmp_primals; + cusparse_dn_mat_uptr batch_tmp_primals; // cusparse view of At * Y computation - cusparse_dn_vec_descr_wrapper_t - current_AtY; // Only used at very first iteration and after each restart to average - cusparse_dn_vec_descr_wrapper_t - next_AtY; // Next value is swapped out with current after each valid PDHG - // step to save the first AtY SpMV in compute next primal - cusparse_dn_vec_descr_wrapper_t potential_next_dual_solution; + cusparse_dn_vec_uptr current_AtY; // Only used at very first iteration and after each restart to + // average + cusparse_dn_vec_uptr next_AtY; // Next value is swapped out with current after each valid PDHG + // step to save the first AtY SpMV in compute next primal + cusparse_dn_vec_uptr potential_next_dual_solution; // cusparse view of auxiliary space needed for some spmv computations - cusparse_dn_vec_descr_wrapper_t tmp_primal; - cusparse_dn_vec_descr_wrapper_t tmp_dual; + cusparse_dn_vec_uptr tmp_primal; + cusparse_dn_vec_uptr tmp_dual; // reuse buffers for cusparse spmv rmm::device_uvector buffer_non_transpose; @@ -250,10 +239,10 @@ class cusparse_view_t { #if CUDA_VER_13_2_UP // SpMVOp descriptors and plans for A and A_T (descr before plan so dtor destroys plan first) - cusparse_spmvop_descr_wrapper_t spmv_op_descr_A_; - cusparse_spmvop_plan_wrapper_t spmv_op_plan_A_; - cusparse_spmvop_descr_wrapper_t spmv_op_descr_A_t_; - cusparse_spmvop_plan_wrapper_t spmv_op_plan_A_t_; + cusparse_spmvop_descr_uptr spmv_op_descr_A_; + cusparse_spmvop_plan_uptr spmv_op_plan_A_; + cusparse_spmvop_descr_uptr spmv_op_descr_A_t_; + cusparse_spmvop_plan_uptr spmv_op_plan_A_t_; #endif // reuse buffers for cusparse spmm rmm::device_uvector buffer_transpose_batch; @@ -261,7 +250,7 @@ class cusparse_view_t { rmm::device_uvector buffer_transpose_batch_row_row_; rmm::device_uvector buffer_non_transpose_batch_row_row_; // Only when using reflection - cusparse_dn_vec_descr_wrapper_t reflected_primal_solution; + cusparse_dn_vec_uptr reflected_primal_solution; // Ref to the A_T found in either // Initial problem, we use it to have an unscaled A_T @@ -283,8 +272,8 @@ class cusparse_view_t { // Only used when mixed_precision_enabled_ is true and f_t = double rmm::device_uvector A_float_; // FP32 copy of A values rmm::device_uvector A_T_float_; // FP32 copy of A_T values - cusparse_sp_mat_descr_wrapper_t A_mixed_; // FP32 matrix descriptor for A - cusparse_sp_mat_descr_wrapper_t A_T_mixed_; // FP32 matrix descriptor for A_T + cusparse_sp_mat_uptr A_mixed_; // FP32 matrix descriptor for A + cusparse_sp_mat_uptr A_T_mixed_; // FP32 matrix descriptor for A_T rmm::device_uvector buffer_non_transpose_mixed_; // SpMV buffer for mixed precision A rmm::device_uvector buffer_transpose_mixed_; // SpMV buffer for mixed precision A_T bool mixed_precision_enabled_{false}; @@ -303,10 +292,10 @@ class cusparse_view_t { void mixed_precision_spmv(cusparseHandle_t handle, cusparseOperation_t opA, const double* alpha, - cusparseSpMatDescr_t matA, // FP32 matrix - cusparseDnVecDescr_t vecX, // FP64 vector + cusparse_sp_mat_descr_view matA, // FP32 matrix + cusparse_dn_vec_descr_view vecX, // FP64 vector const double* beta, - cusparseDnVecDescr_t vecY, // FP64 vector + cusparse_dn_vec_descr_view vecY, // FP64 vector cusparseSpMVAlg_t alg, void* externalBuffer, cudaStream_t stream); @@ -314,10 +303,10 @@ void mixed_precision_spmv(cusparseHandle_t handle, size_t mixed_precision_spmv_buffersize(cusparseHandle_t handle, cusparseOperation_t opA, const double* alpha, - cusparseSpMatDescr_t matA, // FP32 matrix - cusparseDnVecDescr_t vecX, // FP64 vector + cusparse_sp_mat_descr_view matA, // FP32 matrix + cusparse_dn_vec_descr_view vecX, // FP64 vector const double* beta, - cusparseDnVecDescr_t vecY, // FP64 vector + cusparse_dn_vec_descr_view vecY, // FP64 vector cusparseSpMVAlg_t alg, cudaStream_t stream); @@ -325,10 +314,10 @@ size_t mixed_precision_spmv_buffersize(cusparseHandle_t handle, void mixed_precision_spmv_preprocess(cusparseHandle_t handle, cusparseOperation_t opA, const double* alpha, - cusparseSpMatDescr_t matA, // FP32 matrix - cusparseDnVecDescr_t vecX, // FP64 vector + cusparse_sp_mat_descr_view matA, // FP32 matrix + cusparse_dn_vec_descr_view vecX, // FP64 vector const double* beta, - cusparseDnVecDescr_t vecY, // FP64 vector + cusparse_dn_vec_descr_view vecY, // FP64 vector cusparseSpMVAlg_t alg, void* externalBuffer, cudaStream_t stream); @@ -342,10 +331,10 @@ void my_cusparsespmm_preprocess(cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, const T* alpha, - const cusparseSpMatDescr_t matA, - const cusparseDnMatDescr_t matB, + cusparse_sp_mat_descr_view matA, + cusparse_dn_mat_descr_view matB, const T* beta, - const cusparseDnMatDescr_t matC, + cusparse_dn_mat_descr_view matC, cusparseSpMMAlg_t alg, void* externalBuffer, cudaStream_t stream); @@ -364,9 +353,9 @@ void cusparse_spmvop_run(cusparseHandle_t handle, cusparseSpMVOpPlan_t plan, const void* alpha, const void* beta, - cusparseDnVecDescr_t vecX, - cusparseDnVecDescr_t vecY, - cusparseDnVecDescr_t vecZ, + cusparse_dn_vec_descr_view vecX, + cusparse_dn_vec_descr_view vecY, + cusparse_dn_vec_descr_view vecZ, cudaStream_t stream); #endif From dafb3e3ae0e74006017964cfe82451d201cc0cb7 Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Mon, 1 Jun 2026 10:20:41 +0200 Subject: [PATCH 2/7] the rest of the PR --- cpp/src/barrier/cusparse_view.hpp | 15 +- cpp/src/barrier/sparse_matrix_kernels.cuh | 86 ++++---- .../optimal_batch_size_handler.cu | 125 ++++++----- cpp/src/pdlp/pdhg.cu | 90 ++++---- cpp/src/pdlp/pdlp.cu | 196 +++++++++--------- .../restart_strategy/pdlp_restart_strategy.cu | 18 +- .../adaptive_step_size_strategy.cu | 12 +- .../convergence_information.cu | 24 +-- .../infeasibility_information.cu | 24 +-- 9 files changed, 300 insertions(+), 290 deletions(-) diff --git a/cpp/src/barrier/cusparse_view.hpp b/cpp/src/barrier/cusparse_view.hpp index 802fc90f8b..0316b4b7b0 100644 --- a/cpp/src/barrier/cusparse_view.hpp +++ b/cpp/src/barrier/cusparse_view.hpp @@ -27,9 +27,8 @@ class cusparse_view_t { public: // TMP matrix data should already be on the GPU and in CSR not CSC cusparse_view_t(raft::handle_t const* handle_ptr, const csc_matrix_t& A); - ~cusparse_view_t(); - detail::cusparse_dn_vec_descr_wrapper_t create_vector(rmm::device_uvector const& vec); + detail::cusparse_dn_vec_uptr create_vector(rmm::device_uvector const& vec); template void spmv(f_t alpha, @@ -38,9 +37,9 @@ class cusparse_view_t { std::vector& y); void spmv(f_t alpha, rmm::device_uvector const& x, f_t beta, rmm::device_uvector& y); void spmv(f_t alpha, - detail::cusparse_dn_vec_descr_wrapper_t const& x, + detail::cusparse_dn_vec_descr_view x, f_t beta, - detail::cusparse_dn_vec_descr_wrapper_t const& y); + detail::cusparse_dn_vec_descr_view y); template void transpose_spmv(f_t alpha, const std::vector& x, @@ -51,9 +50,9 @@ class cusparse_view_t { f_t beta, rmm::device_uvector& y); void transpose_spmv(f_t alpha, - detail::cusparse_dn_vec_descr_wrapper_t const& x, + detail::cusparse_dn_vec_descr_view x, f_t beta, - detail::cusparse_dn_vec_descr_wrapper_t const& y); + detail::cusparse_dn_vec_descr_view y); raft::handle_t const* handle_ptr_{nullptr}; @@ -61,11 +60,11 @@ class cusparse_view_t { rmm::device_uvector A_offsets_; rmm::device_uvector A_indices_; rmm::device_uvector A_data_; - cusparseSpMatDescr_t A_; + detail::cusparse_sp_mat_uptr A_; rmm::device_uvector A_T_offsets_; rmm::device_uvector A_T_indices_; rmm::device_uvector A_T_data_; - cusparseSpMatDescr_t A_T_; + detail::cusparse_sp_mat_uptr A_T_; rmm::device_buffer spmv_buffer_; rmm::device_buffer spmv_buffer_transpose_; rmm::device_scalar d_one_; diff --git a/cpp/src/barrier/sparse_matrix_kernels.cuh b/cpp/src/barrier/sparse_matrix_kernels.cuh index 4727c12ec8..601f64b5b1 100644 --- a/cpp/src/barrier/sparse_matrix_kernels.cuh +++ b/cpp/src/barrier/sparse_matrix_kernels.cuh @@ -24,24 +24,18 @@ void initialize_cusparse_data(raft::handle_t const* handle, f_t chunk_fraction = 0.15; // Create matrix descriptors - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatecsr( - &cusparse_data.matA_descr, A.m, A.n, A_nnz, A.row_start.data(), A.j.data(), A.x.data())); - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatecsr(&cusparse_data.matDAT_descr, - DAT.n, - DAT.m, - DAT_nnz, - DAT.col_start.data(), - DAT.i.data(), - DAT.x.data())); - - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatecsr(&cusparse_data.matADAT_descr, - ADAT.m, - ADAT.n, - 0, - ADAT.row_start.data(), - ADAT.j.data(), - ADAT.x.data())); - RAFT_CUSPARSE_TRY(cusparseSpGEMM_createDescr(&cusparse_data.spgemm_descr)); + cusparse_data.matA_descr = + detail::make_csr(A.m, A.n, A_nnz, A.row_start.data(), A.j.data(), A.x.data()); + cusparse_data.matDAT_descr = detail::make_csr( + DAT.n, DAT.m, DAT_nnz, DAT.col_start.data(), DAT.i.data(), DAT.x.data()); + cusparse_data.matADAT_descr = detail::make_csr( + ADAT.m, ADAT.n, 0, ADAT.row_start.data(), ADAT.j.data(), ADAT.x.data()); + + { + cusparseSpGEMMDescr_t raw{nullptr}; + RAFT_CUSPARSE_TRY(cusparseSpGEMM_createDescr(&raw)); + cusparse_data.spgemm_descr = cusparse_spgemm_uptr{raw}; + } // Buffer size size_t buffer_size; @@ -49,13 +43,13 @@ void initialize_cusparse_data(raft::handle_t const* handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, cusparse_data.alpha.data(), - cusparse_data.matA_descr, - cusparse_data.matDAT_descr, + cusparse_data.matA_descr.get(), + cusparse_data.matDAT_descr.get(), cusparse_data.beta.data(), - cusparse_data.matADAT_descr, + cusparse_data.matADAT_descr.get(), CUDA_R_64F, CUSPARSE_SPGEMM_ALG3, - cusparse_data.spgemm_descr, + cusparse_data.spgemm_descr.get(), &buffer_size, nullptr)); cusparse_data.buffer_size.resize(buffer_size, handle->get_stream()); @@ -64,31 +58,31 @@ void initialize_cusparse_data(raft::handle_t const* handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, cusparse_data.alpha.data(), - cusparse_data.matA_descr, - cusparse_data.matDAT_descr, + cusparse_data.matA_descr.get(), + cusparse_data.matDAT_descr.get(), cusparse_data.beta.data(), - cusparse_data.matADAT_descr, + cusparse_data.matADAT_descr.get(), CUDA_R_64F, CUSPARSE_SPGEMM_ALG3, - cusparse_data.spgemm_descr, + cusparse_data.spgemm_descr.get(), &buffer_size, cusparse_data.buffer_size.data())); int64_t num_prods; - RAFT_CUSPARSE_TRY(cusparseSpGEMM_getNumProducts(cusparse_data.spgemm_descr, &num_prods)); + RAFT_CUSPARSE_TRY(cusparseSpGEMM_getNumProducts(cusparse_data.spgemm_descr.get(), &num_prods)); size_t buffer_size_3_size; RAFT_CUSPARSE_TRY(cusparseSpGEMM_estimateMemory(handle->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, cusparse_data.alpha.data(), - cusparse_data.matA_descr, - cusparse_data.matDAT_descr, + cusparse_data.matA_descr.get(), + cusparse_data.matDAT_descr.get(), cusparse_data.beta.data(), - cusparse_data.matADAT_descr, + cusparse_data.matADAT_descr.get(), CUDA_R_64F, CUSPARSE_SPGEMM_ALG3, - cusparse_data.spgemm_descr, + cusparse_data.spgemm_descr.get(), chunk_fraction, &buffer_size_3_size, nullptr, @@ -99,13 +93,13 @@ void initialize_cusparse_data(raft::handle_t const* handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, cusparse_data.alpha.data(), - cusparse_data.matA_descr, - cusparse_data.matDAT_descr, + cusparse_data.matA_descr.get(), + cusparse_data.matDAT_descr.get(), cusparse_data.beta.data(), - cusparse_data.matADAT_descr, + cusparse_data.matADAT_descr.get(), CUDA_R_64F, CUSPARSE_SPGEMM_ALG3, - cusparse_data.spgemm_descr, + cusparse_data.spgemm_descr.get(), chunk_fraction, &buffer_size_3_size, cusparse_data.buffer_size_3.data(), @@ -126,39 +120,39 @@ void multiply_kernels(raft::handle_t const* handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, cusparse_data.alpha.data(), - cusparse_data.matA_descr, // non-const descriptor supported - cusparse_data.matDAT_descr, // non-const descriptor supported + cusparse_data.matA_descr.get(), // non-const descriptor supported + cusparse_data.matDAT_descr.get(), // non-const descriptor supported cusparse_data.beta.data(), - cusparse_data.matADAT_descr, + cusparse_data.matADAT_descr.get(), CUDA_R_64F, CUSPARSE_SPGEMM_ALG3, - cusparse_data.spgemm_descr, + cusparse_data.spgemm_descr.get(), &cusparse_data.buffer_size_2_size, cusparse_data.buffer_size_2.data())); // get matrix C non-zero entries C_nnz1 int64_t ADAT_num_rows, ADAT_num_cols, ADAT_nnz1; - RAFT_CUSPARSE_TRY( - cusparseSpMatGetSize(cusparse_data.matADAT_descr, &ADAT_num_rows, &ADAT_num_cols, &ADAT_nnz1)); + RAFT_CUSPARSE_TRY(cusparseSpMatGetSize( + cusparse_data.matADAT_descr.get(), &ADAT_num_rows, &ADAT_num_cols, &ADAT_nnz1)); ADAT.resize_to_nnz(ADAT_nnz1, handle->get_stream()); thrust::fill(rmm::exec_policy(handle->get_stream()), ADAT.x.begin(), ADAT.x.end(), 0.0); // update matC with the new pointers RAFT_CUSPARSE_TRY(cusparseCsrSetPointers( - cusparse_data.matADAT_descr, ADAT.row_start.data(), ADAT.j.data(), ADAT.x.data())); + cusparse_data.matADAT_descr.get(), ADAT.row_start.data(), ADAT.j.data(), ADAT.x.data())); RAFT_CUSPARSE_TRY(cusparseSpGEMM_copy(handle->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, cusparse_data.alpha.data(), - cusparse_data.matA_descr, - cusparse_data.matDAT_descr, + cusparse_data.matA_descr.get(), + cusparse_data.matDAT_descr.get(), cusparse_data.beta.data(), - cusparse_data.matADAT_descr, + cusparse_data.matADAT_descr.get(), CUDA_R_64F, CUSPARSE_SPGEMM_ALG3, - cusparse_data.spgemm_descr)); + cusparse_data.spgemm_descr.get())); handle->sync_stream(); } diff --git a/cpp/src/pdlp/optimal_batch_size_handler/optimal_batch_size_handler.cu b/cpp/src/pdlp/optimal_batch_size_handler/optimal_batch_size_handler.cu index cbfb03618d..15809bdb46 100644 --- a/cpp/src/pdlp/optimal_batch_size_handler/optimal_batch_size_handler.cu +++ b/cpp/src/pdlp/optimal_batch_size_handler/optimal_batch_size_handler.cu @@ -19,8 +19,8 @@ namespace cuopt::linear_programming::detail { template struct SpMM_benchmarks_context_t { - SpMM_benchmarks_context_t(cusparse_sp_mat_descr_wrapper_t& A, - cusparse_sp_mat_descr_wrapper_t& A_T, + SpMM_benchmarks_context_t(cusparse_sp_mat_descr_view A, + cusparse_sp_mat_descr_view A_T, int primal_size, int dual_size, size_t current_batch_size, @@ -45,8 +45,9 @@ struct SpMM_benchmarks_context_t { int col_dual = current_batch_size; int ld_dual = current_batch_size; - x_descr.create(rows_primal, col_primal, ld_primal, x.data(), CUSPARSE_ORDER_ROW); - y_descr.create(rows_dual, col_dual, ld_dual, y.data(), CUSPARSE_ORDER_ROW); + x_descr = + make_dnmat(rows_primal, col_primal, ld_primal, x.data(), CUSPARSE_ORDER_ROW); + y_descr = make_dnmat(rows_dual, col_dual, ld_dual, y.data(), CUSPARSE_ORDER_ROW); // Init buffers for SpMMs size_t buffer_size_non_transpose_batch = 0; @@ -56,9 +57,9 @@ struct SpMM_benchmarks_context_t { CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), A, - x_descr, + x_descr.get(), beta.data(), - y_descr, + y_descr.get(), (deterministic_batch_pdlp) ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2, &buffer_size_non_transpose_batch, stream_view)); @@ -70,9 +71,9 @@ struct SpMM_benchmarks_context_t { CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), A_T, - y_descr, + y_descr.get(), beta.data(), - x_descr, + x_descr.get(), (deterministic_batch_pdlp) ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2, &buffer_size_transpose_batch, stream_view)); @@ -88,9 +89,9 @@ struct SpMM_benchmarks_context_t { CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), A_T, - y_descr, + y_descr.get(), beta.data(), - x_descr, + x_descr.get(), (deterministic_batch_pdlp) ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2, buffer_transpose_batch.data(), stream_view); @@ -101,9 +102,9 @@ struct SpMM_benchmarks_context_t { CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), A, - x_descr, + x_descr.get(), beta.data(), - y_descr, + y_descr.get(), (deterministic_batch_pdlp) ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2, buffer_non_transpose_batch.data(), stream_view); @@ -123,9 +124,9 @@ struct SpMM_benchmarks_context_t { CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), A, - x_descr, + x_descr.get(), beta.data(), - y_descr, + y_descr.get(), (deterministic_batch_pdlp) ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2, (f_t*)buffer_non_transpose_batch.data(), stream_view)); @@ -136,30 +137,30 @@ struct SpMM_benchmarks_context_t { CUSPARSE_OPERATION_NON_TRANSPOSE, alpha.data(), A_T, - y_descr, + y_descr.get(), beta.data(), - x_descr, + x_descr.get(), (deterministic_batch_pdlp) ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2, (f_t*)buffer_transpose_batch.data(), stream_view)); } - cusparse_dn_mat_descr_wrapper_t x_descr; - cusparse_dn_mat_descr_wrapper_t y_descr; + cusparse_dn_mat_uptr x_descr; + cusparse_dn_mat_uptr y_descr; rmm::device_uvector x; rmm::device_uvector y; rmm::device_buffer buffer_non_transpose_batch; rmm::device_buffer buffer_transpose_batch; rmm::device_scalar alpha; rmm::device_scalar beta; - cusparse_sp_mat_descr_wrapper_t& A; - cusparse_sp_mat_descr_wrapper_t& A_T; + cusparse_sp_mat_descr_view A; + cusparse_sp_mat_descr_view A_T; raft::handle_t const* handle_ptr; }; template -static double evaluate_node(cusparse_sp_mat_descr_wrapper_t& A, - cusparse_sp_mat_descr_wrapper_t& A_T, +static double evaluate_node(cusparse_sp_mat_descr_view A, + cusparse_sp_mat_descr_view A_T, i_t primal_size, i_t dual_size, int current_batch_size, @@ -223,24 +224,20 @@ int optimal_batch_size_handler(const optimization_problem_t& op_proble detail::problem_t problem(op_problem); // Init cuSparse views - cusparse_sp_mat_descr_wrapper_t A; - cusparse_sp_mat_descr_wrapper_t A_T; - i_t primal_size = problem.n_variables; - i_t dual_size = problem.n_constraints; - - A.create(problem.n_constraints, - problem.n_variables, - problem.nnz, - problem.offsets.data(), - problem.variables.data(), - problem.coefficients.data()); - - A_T.create(problem.n_variables, - problem.n_constraints, - problem.nnz, - problem.reverse_offsets.data(), - problem.reverse_constraints.data(), - problem.reverse_coefficients.data()); + cusparse_sp_mat_uptr A = make_csr(problem.n_constraints, + problem.n_variables, + problem.nnz, + problem.offsets.data(), + problem.variables.data(), + problem.coefficients.data()); + cusparse_sp_mat_uptr A_T = make_csr(problem.n_variables, + problem.n_constraints, + problem.nnz, + problem.reverse_offsets.data(), + problem.reverse_constraints.data(), + problem.reverse_coefficients.data()); + i_t primal_size = problem.n_variables; + i_t dual_size = problem.n_constraints; // Sync before starting anything to make sure everything is done RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view)); @@ -249,17 +246,27 @@ int optimal_batch_size_handler(const optimization_problem_t& op_proble const int left_node = std::max(1, current_batch_size / 2); const int right_node = std::min(current_batch_size * 2, max_batch_size); - double current_ratio = evaluate_node(A, - A_T, + double current_ratio = evaluate_node(A.get(), + A_T.get(), primal_size, dual_size, current_batch_size, benchmark_runs, op_problem.get_handle_ptr()); - double left_ratio = evaluate_node( - A, A_T, primal_size, dual_size, left_node, benchmark_runs, op_problem.get_handle_ptr()); - double right_ratio = evaluate_node( - A, A_T, primal_size, dual_size, right_node, benchmark_runs, op_problem.get_handle_ptr()); + double left_ratio = evaluate_node(A.get(), + A_T.get(), + primal_size, + dual_size, + left_node, + benchmark_runs, + op_problem.get_handle_ptr()); + double right_ratio = evaluate_node(A.get(), + A_T.get(), + primal_size, + dual_size, + right_node, + benchmark_runs, + op_problem.get_handle_ptr()); int current_step = 1; #ifdef BATCH_VERBOSE_MODE @@ -289,8 +296,8 @@ int optimal_batch_size_handler(const optimization_problem_t& op_proble #ifdef BATCH_VERBOSE_MODE std::cout << "Evaluating left node: " << current_batch_size << std::endl; #endif - left_ratio = evaluate_node(A, - A_T, + left_ratio = evaluate_node(A.get(), + A_T.get(), primal_size, dual_size, current_batch_size, @@ -324,8 +331,13 @@ int optimal_batch_size_handler(const optimization_problem_t& op_proble #ifdef BATCH_VERBOSE_MODE std::cout << "Testing one last time between the two at node: " << middle_node << std::endl; #endif - double middle_ratio = evaluate_node( - A, A_T, primal_size, dual_size, middle_node, benchmark_runs, op_problem.get_handle_ptr()); + double middle_ratio = evaluate_node(A.get(), + A_T.get(), + primal_size, + dual_size, + middle_node, + benchmark_runs, + op_problem.get_handle_ptr()); #ifdef BATCH_VERBOSE_MODE std::cout << "Middle node ratio: " << middle_ratio << std::endl; #endif @@ -367,8 +379,8 @@ int optimal_batch_size_handler(const optimization_problem_t& op_proble #ifdef BATCH_VERBOSE_MODE std::cout << "Evaluating right node: " << current_batch_size << std::endl; #endif - right_ratio = evaluate_node(A, - A_T, + right_ratio = evaluate_node(A.get(), + A_T.get(), primal_size, dual_size, current_batch_size, @@ -403,8 +415,13 @@ int optimal_batch_size_handler(const optimization_problem_t& op_proble #ifdef BATCH_VERBOSE_MODE std::cout << "Testing one last time between the two at node: " << middle_node << std::endl; #endif - double middle_ratio = evaluate_node( - A, A_T, primal_size, dual_size, middle_node, benchmark_runs, op_problem.get_handle_ptr()); + double middle_ratio = evaluate_node(A.get(), + A_T.get(), + primal_size, + dual_size, + middle_node, + benchmark_runs, + op_problem.get_handle_ptr()); #ifdef BATCH_VERBOSE_MODE std::cout << "Middle node ratio: " << middle_ratio << std::endl; #endif diff --git a/cpp/src/pdlp/pdhg.cu b/cpp/src/pdlp/pdhg.cu index e88366a295..44ddece1c1 100644 --- a/cpp/src/pdlp/pdhg.cu +++ b/cpp/src/pdlp/pdhg.cu @@ -401,10 +401,10 @@ void pdhg_solver_t::compute_next_dual_solution(rmm::device_uvectorget_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - cusparse_view_.A_mixed_, - cusparse_view_.tmp_primal, + cusparse_view_.A_mixed_.get(), + cusparse_view_.tmp_primal.get(), reusable_device_scalar_value_0_.data(), - cusparse_view_.dual_gradient, + cusparse_view_.dual_gradient.get(), CUSPARSE_SPMV_CSR_ALG2, cusparse_view_.buffer_non_transpose_mixed_.data(), stream_view_); @@ -415,10 +415,10 @@ void pdhg_solver_t::compute_next_dual_solution(rmm::device_uvectorget_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - cusparse_view_.A, - cusparse_view_.tmp_primal, + cusparse_view_.A.get(), + cusparse_view_.tmp_primal.get(), reusable_device_scalar_value_0_.data(), - cusparse_view_.dual_gradient, + cusparse_view_.dual_gradient.get(), CUSPARSE_SPMV_CSR_ALG2, (f_t*)cusparse_view_.buffer_non_transpose.data(), stream_view_)); @@ -450,12 +450,12 @@ void pdhg_solver_t::spmvop_At_y() #if CUDA_VER_13_2_UP if (is_cusparse_runtime_spmvop_supported()) { cusparse_spmvop_run(handle_ptr_->get_cusparse_handle(), - cusparse_view_.spmv_op_plan_A_t_, + cusparse_view_.spmv_op_plan_A_t_.get(), reusable_device_scalar_value_1_.data(), reusable_device_scalar_value_0_.data(), - cusparse_view_.dual_solution, - cusparse_view_.current_AtY, - cusparse_view_.current_AtY, + cusparse_view_.dual_solution.get(), + cusparse_view_.current_AtY.get(), + cusparse_view_.current_AtY.get(), stream_view_.value()); return; } @@ -463,10 +463,10 @@ void pdhg_solver_t::spmvop_At_y() RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - cusparse_view_.A_T, - cusparse_view_.dual_solution, + cusparse_view_.A_T.get(), + cusparse_view_.dual_solution.get(), reusable_device_scalar_value_0_.data(), - cusparse_view_.current_AtY, + cusparse_view_.current_AtY.get(), CUSPARSE_SPMV_CSR_ALG2, (f_t*)cusparse_view_.buffer_transpose.data(), stream_view_)); @@ -478,12 +478,12 @@ void pdhg_solver_t::spmvop_A_x() #if CUDA_VER_13_2_UP if (is_cusparse_runtime_spmvop_supported()) { cusparse_spmvop_run(handle_ptr_->get_cusparse_handle(), - cusparse_view_.spmv_op_plan_A_, + cusparse_view_.spmv_op_plan_A_.get(), reusable_device_scalar_value_1_.data(), reusable_device_scalar_value_0_.data(), - cusparse_view_.reflected_primal_solution, - cusparse_view_.dual_gradient, - cusparse_view_.dual_gradient, + cusparse_view_.reflected_primal_solution.get(), + cusparse_view_.dual_gradient.get(), + cusparse_view_.dual_gradient.get(), stream_view_.value()); return; } @@ -492,10 +492,10 @@ void pdhg_solver_t::spmvop_A_x() raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - cusparse_view_.A, - cusparse_view_.reflected_primal_solution, + cusparse_view_.A.get(), + cusparse_view_.reflected_primal_solution.get(), reusable_device_scalar_value_0_.data(), - cusparse_view_.dual_gradient, + cusparse_view_.dual_gradient.get(), CUSPARSE_SPMV_CSR_ALG2, (f_t*)cusparse_view_.buffer_non_transpose.data(), stream_view_)); @@ -512,10 +512,10 @@ void pdhg_solver_t::compute_At_y() mixed_precision_spmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - cusparse_view_.A_T_mixed_, - cusparse_view_.dual_solution, + cusparse_view_.A_T_mixed_.get(), + cusparse_view_.dual_solution.get(), reusable_device_scalar_value_0_.data(), - cusparse_view_.current_AtY, + cusparse_view_.current_AtY.get(), CUSPARSE_SPMV_CSR_ALG2, cusparse_view_.buffer_transpose_mixed_.data(), stream_view_); @@ -527,10 +527,10 @@ void pdhg_solver_t::compute_At_y() raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - cusparse_view_.A_T, - cusparse_view_.dual_solution, + cusparse_view_.A_T.get(), + cusparse_view_.dual_solution.get(), reusable_device_scalar_value_0_.data(), - cusparse_view_.current_AtY, + cusparse_view_.current_AtY.get(), CUSPARSE_SPMV_CSR_ALG2, (f_t*)cusparse_view_.buffer_transpose.data(), stream_view_)); @@ -541,10 +541,10 @@ void pdhg_solver_t::compute_At_y() CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - cusparse_view_.A_T, - cusparse_view_.batch_dual_solutions, + cusparse_view_.A_T.get(), + cusparse_view_.batch_dual_solutions.get(), reusable_device_scalar_value_0_.data(), - cusparse_view_.batch_current_AtYs, + cusparse_view_.batch_current_AtYs.get(), (deterministic_batch_pdlp) ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2, (f_t*)cusparse_view_.buffer_transpose_batch_row_row_.data(), stream_view_)); @@ -561,10 +561,10 @@ void pdhg_solver_t::compute_A_x() mixed_precision_spmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - cusparse_view_.A_mixed_, - cusparse_view_.reflected_primal_solution, + cusparse_view_.A_mixed_.get(), + cusparse_view_.reflected_primal_solution.get(), reusable_device_scalar_value_0_.data(), - cusparse_view_.dual_gradient, + cusparse_view_.dual_gradient.get(), CUSPARSE_SPMV_CSR_ALG2, cusparse_view_.buffer_non_transpose_mixed_.data(), stream_view_); @@ -576,10 +576,10 @@ void pdhg_solver_t::compute_A_x() raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - cusparse_view_.A, - cusparse_view_.reflected_primal_solution, + cusparse_view_.A.get(), + cusparse_view_.reflected_primal_solution.get(), reusable_device_scalar_value_0_.data(), - cusparse_view_.dual_gradient, + cusparse_view_.dual_gradient.get(), CUSPARSE_SPMV_CSR_ALG2, (f_t*)cusparse_view_.buffer_non_transpose.data(), stream_view_)); @@ -590,10 +590,10 @@ void pdhg_solver_t::compute_A_x() CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - cusparse_view_.A, - cusparse_view_.batch_reflected_primal_solutions, + cusparse_view_.A.get(), + cusparse_view_.batch_reflected_primal_solutions.get(), reusable_device_scalar_value_0_.data(), - cusparse_view_.batch_dual_gradients, + cusparse_view_.batch_dual_gradients.get(), (deterministic_batch_pdlp) ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2, (f_t*)cusparse_view_.buffer_non_transpose_batch_row_row_.data(), stream_view_)); @@ -1381,21 +1381,21 @@ void pdhg_solver_t::update_solution( std::swap(current_saddle_point_state_.current_AtY_, current_saddle_point_state_.next_AtY_); // Update cusparse views to point to the new values, cost is marginal - RAFT_CUSPARSE_TRY(cusparseDnVecSetValues(cusparse_view_.current_AtY, + RAFT_CUSPARSE_TRY(cusparseDnVecSetValues(cusparse_view_.current_AtY.get(), current_saddle_point_state_.current_AtY_.data())); RAFT_CUSPARSE_TRY( - cusparseDnVecSetValues(cusparse_view_.next_AtY, current_saddle_point_state_.next_AtY_.data())); - RAFT_CUSPARSE_TRY(cusparseDnVecSetValues(cusparse_view_.potential_next_dual_solution, + cusparseDnVecSetValues(cusparse_view_.next_AtY.get(), current_saddle_point_state_.next_AtY_.data())); + RAFT_CUSPARSE_TRY(cusparseDnVecSetValues(cusparse_view_.potential_next_dual_solution.get(), potential_next_dual_solution_.data())); - RAFT_CUSPARSE_TRY(cusparseDnVecSetValues(cusparse_view_.primal_solution, + RAFT_CUSPARSE_TRY(cusparseDnVecSetValues(cusparse_view_.primal_solution.get(), current_saddle_point_state_.primal_solution_.data())); - RAFT_CUSPARSE_TRY(cusparseDnVecSetValues(cusparse_view_.dual_solution, + RAFT_CUSPARSE_TRY(cusparseDnVecSetValues(cusparse_view_.dual_solution.get(), current_saddle_point_state_.dual_solution_.data())); RAFT_CUSPARSE_TRY( - cusparseDnVecSetValues(current_op_problem_evaluation_cusparse_view_.primal_solution, + cusparseDnVecSetValues(current_op_problem_evaluation_cusparse_view_.primal_solution.get(), current_saddle_point_state_.primal_solution_.data())); RAFT_CUSPARSE_TRY( - cusparseDnVecSetValues(current_op_problem_evaluation_cusparse_view_.dual_solution, + cusparseDnVecSetValues(current_op_problem_evaluation_cusparse_view_.dual_solution.get(), current_saddle_point_state_.dual_solution_.data())); } diff --git a/cpp/src/pdlp/pdlp.cu b/cpp/src/pdlp/pdlp.cu index dadc54cc08..e1a874d20f 100644 --- a/cpp/src/pdlp/pdlp.cu +++ b/cpp/src/pdlp/pdlp.cu @@ -1749,72 +1749,72 @@ void pdlp_solver_t::resize_and_swap_all_context_loop( // Reset all cusparse view - // Reset cuSparse views for PDHG - auto& pdhg_cusparse_view = pdhg_solver_.get_cusparse_view(); - pdhg_cusparse_view.batch_dual_solutions.create( - op_problem_scaled_.n_constraints, - climber_strategies_.size(), - climber_strategies_.size(), - pdhg_solver_.get_saddle_point_state().get_dual_solution().data(), - CUSPARSE_ORDER_ROW); - pdhg_cusparse_view.batch_current_AtYs.create( - op_problem_scaled_.n_variables, - climber_strategies_.size(), - climber_strategies_.size(), - pdhg_solver_.get_saddle_point_state().get_current_AtY().data(), - CUSPARSE_ORDER_ROW); - pdhg_cusparse_view.batch_reflected_primal_solutions.create( - op_problem_scaled_.n_variables, - climber_strategies_.size(), - climber_strategies_.size(), - pdhg_solver_.get_reflected_primal().data(), - CUSPARSE_ORDER_ROW); - pdhg_cusparse_view.batch_dual_gradients.create( - op_problem_scaled_.n_constraints, - climber_strategies_.size(), - climber_strategies_.size(), - pdhg_solver_.get_saddle_point_state().get_dual_gradient().data(), - CUSPARSE_ORDER_ROW); + // Reset cuSparse views for PDHG. unique_ptr move-assign destroys the old descriptor first. + auto& pdhg_cusparse_view = pdhg_solver_.get_cusparse_view(); + pdhg_cusparse_view.batch_dual_solutions = + detail::make_dnmat(op_problem_scaled_.n_constraints, + climber_strategies_.size(), + climber_strategies_.size(), + pdhg_solver_.get_saddle_point_state().get_dual_solution().data(), + CUSPARSE_ORDER_ROW); + pdhg_cusparse_view.batch_current_AtYs = + detail::make_dnmat(op_problem_scaled_.n_variables, + climber_strategies_.size(), + climber_strategies_.size(), + pdhg_solver_.get_saddle_point_state().get_current_AtY().data(), + CUSPARSE_ORDER_ROW); + pdhg_cusparse_view.batch_reflected_primal_solutions = + detail::make_dnmat(op_problem_scaled_.n_variables, + climber_strategies_.size(), + climber_strategies_.size(), + pdhg_solver_.get_reflected_primal().data(), + CUSPARSE_ORDER_ROW); + pdhg_cusparse_view.batch_dual_gradients = + detail::make_dnmat(op_problem_scaled_.n_constraints, + climber_strategies_.size(), + climber_strategies_.size(), + pdhg_solver_.get_saddle_point_state().get_dual_gradient().data(), + CUSPARSE_ORDER_ROW); // Reset cusparse view used by adaptive step size strategy but owned by PDHG - pdhg_cusparse_view.batch_potential_next_dual_solution.create( - op_problem_scaled_.n_constraints, - climber_strategies_.size(), - op_problem_scaled_.n_constraints, - pdhg_solver_.get_potential_next_dual_solution().data(), - CUSPARSE_ORDER_COL); - pdhg_cusparse_view.batch_next_AtYs.create( - op_problem_scaled_.n_variables, - climber_strategies_.size(), - op_problem_scaled_.n_variables, - pdhg_solver_.get_saddle_point_state().get_next_AtY().data(), - CUSPARSE_ORDER_COL); + pdhg_cusparse_view.batch_potential_next_dual_solution = + detail::make_dnmat(op_problem_scaled_.n_constraints, + climber_strategies_.size(), + op_problem_scaled_.n_constraints, + pdhg_solver_.get_potential_next_dual_solution().data(), + CUSPARSE_ORDER_COL); + pdhg_cusparse_view.batch_next_AtYs = + detail::make_dnmat(op_problem_scaled_.n_variables, + climber_strategies_.size(), + op_problem_scaled_.n_variables, + pdhg_solver_.get_saddle_point_state().get_next_AtY().data(), + CUSPARSE_ORDER_COL); // Reset cusparse view used by convergence information but owned by PDLP - current_op_problem_evaluation_cusparse_view_.batch_primal_solutions.create( - op_problem_scaled_.n_variables, - climber_strategies_.size(), - op_problem_scaled_.n_variables, - pdhg_solver_.get_potential_next_primal_solution().data(), - CUSPARSE_ORDER_COL); - current_op_problem_evaluation_cusparse_view_.batch_dual_solutions.create( - op_problem_scaled_.n_constraints, - climber_strategies_.size(), - op_problem_scaled_.n_constraints, - pdhg_solver_.get_potential_next_dual_solution().data(), - CUSPARSE_ORDER_COL); - current_op_problem_evaluation_cusparse_view_.batch_tmp_duals.create( - op_problem_scaled_.n_constraints, - climber_strategies_.size(), - op_problem_scaled_.n_constraints, - pdhg_solver_.get_dual_tmp_resource().data(), - CUSPARSE_ORDER_COL); - current_op_problem_evaluation_cusparse_view_.batch_tmp_primals.create( - op_problem_scaled_.n_variables, - climber_strategies_.size(), - op_problem_scaled_.n_variables, - pdhg_solver_.get_primal_tmp_resource().data(), - CUSPARSE_ORDER_COL); + current_op_problem_evaluation_cusparse_view_.batch_primal_solutions = + detail::make_dnmat(op_problem_scaled_.n_variables, + climber_strategies_.size(), + op_problem_scaled_.n_variables, + pdhg_solver_.get_potential_next_primal_solution().data(), + CUSPARSE_ORDER_COL); + current_op_problem_evaluation_cusparse_view_.batch_dual_solutions = + detail::make_dnmat(op_problem_scaled_.n_constraints, + climber_strategies_.size(), + op_problem_scaled_.n_constraints, + pdhg_solver_.get_potential_next_dual_solution().data(), + CUSPARSE_ORDER_COL); + current_op_problem_evaluation_cusparse_view_.batch_tmp_duals = + detail::make_dnmat(op_problem_scaled_.n_constraints, + climber_strategies_.size(), + op_problem_scaled_.n_constraints, + pdhg_solver_.get_dual_tmp_resource().data(), + CUSPARSE_ORDER_COL); + current_op_problem_evaluation_cusparse_view_.batch_tmp_primals = + detail::make_dnmat(op_problem_scaled_.n_variables, + climber_strategies_.size(), + op_problem_scaled_.n_variables, + pdhg_solver_.get_primal_tmp_resource().data(), + CUSPARSE_ORDER_COL); // Recalculate SpMM buffer sizes for the new batch dimensions. // cuSparse may require different buffer sizes when the number of columns changes @@ -1828,10 +1828,10 @@ void pdlp_solver_t::resize_and_swap_all_context_loop( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - pdhg_cusparse_view.A_T, - pdhg_cusparse_view.batch_dual_solutions, + pdhg_cusparse_view.A_T.get(), + pdhg_cusparse_view.batch_dual_solutions.get(), reusable_device_scalar_value_0_.data(), - pdhg_cusparse_view.batch_current_AtYs, + pdhg_cusparse_view.batch_current_AtYs.get(), (deterministic_batch_pdlp) ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2, &new_buf_size, stream_view_)); @@ -1843,10 +1843,10 @@ void pdlp_solver_t::resize_and_swap_all_context_loop( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - pdhg_cusparse_view.A, - pdhg_cusparse_view.batch_reflected_primal_solutions, + pdhg_cusparse_view.A.get(), + pdhg_cusparse_view.batch_reflected_primal_solutions.get(), reusable_device_scalar_value_0_.data(), - pdhg_cusparse_view.batch_dual_gradients, + pdhg_cusparse_view.batch_dual_gradients.get(), (deterministic_batch_pdlp) ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2, &new_buf_size, stream_view_)); @@ -1858,10 +1858,10 @@ void pdlp_solver_t::resize_and_swap_all_context_loop( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - pdhg_cusparse_view.A_T, - pdhg_cusparse_view.batch_potential_next_dual_solution, + pdhg_cusparse_view.A_T.get(), + pdhg_cusparse_view.batch_potential_next_dual_solution.get(), reusable_device_scalar_value_0_.data(), - pdhg_cusparse_view.batch_next_AtYs, + pdhg_cusparse_view.batch_next_AtYs.get(), CUSPARSE_SPMM_CSR_ALG3, &new_buf_size, stream_view_)); @@ -1873,10 +1873,10 @@ void pdlp_solver_t::resize_and_swap_all_context_loop( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - current_op_problem_evaluation_cusparse_view_.A_T, - current_op_problem_evaluation_cusparse_view_.batch_dual_solutions, + current_op_problem_evaluation_cusparse_view_.A_T.get(), + current_op_problem_evaluation_cusparse_view_.batch_dual_solutions.get(), reusable_device_scalar_value_0_.data(), - current_op_problem_evaluation_cusparse_view_.batch_tmp_primals, + current_op_problem_evaluation_cusparse_view_.batch_tmp_primals.get(), CUSPARSE_SPMM_CSR_ALG3, &new_buf_size, stream_view_)); @@ -1889,10 +1889,10 @@ void pdlp_solver_t::resize_and_swap_all_context_loop( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - current_op_problem_evaluation_cusparse_view_.A, - current_op_problem_evaluation_cusparse_view_.batch_primal_solutions, + current_op_problem_evaluation_cusparse_view_.A.get(), + current_op_problem_evaluation_cusparse_view_.batch_primal_solutions.get(), reusable_device_scalar_value_0_.data(), - current_op_problem_evaluation_cusparse_view_.batch_tmp_duals, + current_op_problem_evaluation_cusparse_view_.batch_tmp_duals.get(), CUSPARSE_SPMM_CSR_ALG3, &new_buf_size, stream_view_)); @@ -1909,10 +1909,10 @@ void pdlp_solver_t::resize_and_swap_all_context_loop( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - pdhg_cusparse_view.A_T, - pdhg_cusparse_view.batch_dual_solutions, + pdhg_cusparse_view.A_T.get(), + pdhg_cusparse_view.batch_dual_solutions.get(), reusable_device_scalar_value_0_.data(), - pdhg_cusparse_view.batch_current_AtYs, + pdhg_cusparse_view.batch_current_AtYs.get(), (deterministic_batch_pdlp) ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2, pdhg_cusparse_view.buffer_transpose_batch_row_row_.data(), stream_view_); @@ -1921,10 +1921,10 @@ void pdlp_solver_t::resize_and_swap_all_context_loop( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - pdhg_cusparse_view.A, - pdhg_cusparse_view.batch_reflected_primal_solutions, + pdhg_cusparse_view.A.get(), + pdhg_cusparse_view.batch_reflected_primal_solutions.get(), reusable_device_scalar_value_0_.data(), - pdhg_cusparse_view.batch_dual_gradients, + pdhg_cusparse_view.batch_dual_gradients.get(), (deterministic_batch_pdlp) ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2, pdhg_cusparse_view.buffer_non_transpose_batch_row_row_.data(), stream_view_); @@ -1934,10 +1934,10 @@ void pdlp_solver_t::resize_and_swap_all_context_loop( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - pdhg_cusparse_view.A_T, - pdhg_cusparse_view.batch_potential_next_dual_solution, + pdhg_cusparse_view.A_T.get(), + pdhg_cusparse_view.batch_potential_next_dual_solution.get(), reusable_device_scalar_value_0_.data(), - pdhg_cusparse_view.batch_next_AtYs, + pdhg_cusparse_view.batch_next_AtYs.get(), CUSPARSE_SPMM_CSR_ALG3, (f_t*)pdhg_cusparse_view.buffer_transpose_batch.data(), stream_view_); @@ -1948,10 +1948,10 @@ void pdlp_solver_t::resize_and_swap_all_context_loop( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - current_op_problem_evaluation_cusparse_view_.A_T, - current_op_problem_evaluation_cusparse_view_.batch_dual_solutions, + current_op_problem_evaluation_cusparse_view_.A_T.get(), + current_op_problem_evaluation_cusparse_view_.batch_dual_solutions.get(), reusable_device_scalar_value_0_.data(), - current_op_problem_evaluation_cusparse_view_.batch_tmp_primals, + current_op_problem_evaluation_cusparse_view_.batch_tmp_primals.get(), CUSPARSE_SPMM_CSR_ALG3, (f_t*)current_op_problem_evaluation_cusparse_view_.buffer_transpose_batch.data(), stream_view_); @@ -1961,10 +1961,10 @@ void pdlp_solver_t::resize_and_swap_all_context_loop( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - current_op_problem_evaluation_cusparse_view_.A, - current_op_problem_evaluation_cusparse_view_.batch_primal_solutions, + current_op_problem_evaluation_cusparse_view_.A.get(), + current_op_problem_evaluation_cusparse_view_.batch_primal_solutions.get(), reusable_device_scalar_value_0_.data(), - current_op_problem_evaluation_cusparse_view_.batch_tmp_duals, + current_op_problem_evaluation_cusparse_view_.batch_tmp_duals.get(), CUSPARSE_SPMM_CSR_ALG3, (f_t*)current_op_problem_evaluation_cusparse_view_.buffer_non_transpose_batch.data(), stream_view_); @@ -2027,11 +2027,11 @@ void pdlp_solver_t::compute_fixed_error(std::vector& has_restarte RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_)); // Make potential_next_dual_solution point towards reflected dual solution to reuse the code - RAFT_CUSPARSE_TRY(cusparseDnVecSetValues(cusparse_view.potential_next_dual_solution, + RAFT_CUSPARSE_TRY(cusparseDnVecSetValues(cusparse_view.potential_next_dual_solution.get(), (void*)pdhg_solver_.get_reflected_dual().data())); if (batch_mode_) - RAFT_CUSPARSE_TRY(cusparseDnMatSetValues(cusparse_view.batch_potential_next_dual_solution, + RAFT_CUSPARSE_TRY(cusparseDnMatSetValues(cusparse_view.batch_potential_next_dual_solution.get(), (void*)pdhg_solver_.get_reflected_dual().data())); step_size_strategy_.compute_interaction_and_movement( @@ -2068,12 +2068,12 @@ void pdlp_solver_t::compute_fixed_error(std::vector& has_restarte // Put back RAFT_CUSPARSE_TRY( - cusparseDnVecSetValues(cusparse_view.potential_next_dual_solution, + cusparseDnVecSetValues(cusparse_view.potential_next_dual_solution.get(), (void*)pdhg_solver_.get_potential_next_dual_solution().data())); if (batch_mode_) { RAFT_CUSPARSE_TRY( - cusparseDnMatSetValues(cusparse_view.batch_potential_next_dual_solution, + cusparseDnMatSetValues(cusparse_view.batch_potential_next_dual_solution.get(), (void*)pdhg_solver_.get_potential_next_dual_solution().data())); } @@ -3023,7 +3023,7 @@ void pdlp_solver_t::compute_initial_step_size() raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - cusparse_view_.A_T, + cusparse_view_.A_T.get(), vecQ, reusable_device_scalar_value_0_.data(), vecATQ, @@ -3036,7 +3036,7 @@ void pdlp_solver_t::compute_initial_step_size() raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), // 1 - cusparse_view_.A, + cusparse_view_.A.get(), vecATQ, reusable_device_scalar_value_0_.data(), // 1 vecZ, diff --git a/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu b/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu index 17c7abcac5..40a34fc03f 100644 --- a/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu +++ b/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu @@ -2210,10 +2210,10 @@ void pdlp_restart_strategy_t::compute_primal_gradient( RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_neg_1_.data(), - cusparse_view.A_T, - cusparse_view.dual_solution, + cusparse_view.A_T.get(), + cusparse_view.dual_solution.get(), reusable_device_scalar_value_1_.data(), - cusparse_view.primal_gradient, + cusparse_view.primal_gradient.get(), CUSPARSE_SPMV_CSR_ALG2, (f_t*)cusparse_view.buffer_transpose.data(), stream_view_)); @@ -2278,10 +2278,10 @@ void pdlp_restart_strategy_t::compute_dual_gradient( raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - cusparse_view.A, - cusparse_view.primal_solution, + cusparse_view.A.get(), + cusparse_view.primal_solution.get(), reusable_device_scalar_value_0_.data(), - cusparse_view.dual_gradient, + cusparse_view.dual_gradient.get(), CUSPARSE_SPMV_CSR_ALG2, (f_t*)cusparse_view.buffer_non_transpose.data(), stream_view_)); @@ -2335,10 +2335,10 @@ void pdlp_restart_strategy_t::compute_lagrangian_value( RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - cusparse_view.A_T, - cusparse_view.dual_solution, + cusparse_view.A_T.get(), + cusparse_view.dual_solution.get(), reusable_device_scalar_value_0_.data(), - cusparse_view.tmp_primal, + cusparse_view.tmp_primal.get(), CUSPARSE_SPMV_CSR_ALG2, (f_t*)cusparse_view.buffer_transpose.data(), stream_view_)); diff --git a/cpp/src/pdlp/step_size_strategy/adaptive_step_size_strategy.cu b/cpp/src/pdlp/step_size_strategy/adaptive_step_size_strategy.cu index 1f137dc9ea..f16addde4b 100644 --- a/cpp/src/pdlp/step_size_strategy/adaptive_step_size_strategy.cu +++ b/cpp/src/pdlp/step_size_strategy/adaptive_step_size_strategy.cu @@ -389,10 +389,10 @@ void adaptive_step_size_strategy_t::compute_interaction_and_movement( raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), // alpha - cusparse_view.A_T, - cusparse_view.potential_next_dual_solution, + cusparse_view.A_T.get(), + cusparse_view.potential_next_dual_solution.get(), reusable_device_scalar_value_0_.data(), // beta - cusparse_view.next_AtY, + cusparse_view.next_AtY.get(), CUSPARSE_SPMV_CSR_ALG2, (f_t*)cusparse_view.buffer_transpose.data(), stream_view_.value())); @@ -403,10 +403,10 @@ void adaptive_step_size_strategy_t::compute_interaction_and_movement( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - cusparse_view.A_T, - cusparse_view.batch_potential_next_dual_solution, + cusparse_view.A_T.get(), + cusparse_view.batch_potential_next_dual_solution.get(), reusable_device_scalar_value_0_.data(), - cusparse_view.batch_next_AtYs, + cusparse_view.batch_next_AtYs.get(), CUSPARSE_SPMM_CSR_ALG3, (f_t*)cusparse_view.buffer_transpose_batch.data(), stream_view_.value())); diff --git a/cpp/src/pdlp/termination_strategy/convergence_information.cu b/cpp/src/pdlp/termination_strategy/convergence_information.cu index a6d6d14d96..dd4f8ba82d 100644 --- a/cpp/src/pdlp/termination_strategy/convergence_information.cu +++ b/cpp/src/pdlp/termination_strategy/convergence_information.cu @@ -541,10 +541,10 @@ void convergence_information_t::compute_primal_residual( raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - cusparse_view.A, - cusparse_view.primal_solution, + cusparse_view.A.get(), + cusparse_view.primal_solution.get(), reusable_device_scalar_value_0_.data(), - cusparse_view.tmp_dual, + cusparse_view.tmp_dual.get(), CUSPARSE_SPMV_CSR_ALG2, (f_t*)cusparse_view.buffer_non_transpose.data(), stream_view_)); @@ -554,10 +554,10 @@ void convergence_information_t::compute_primal_residual( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - cusparse_view.A, - cusparse_view.batch_primal_solutions, + cusparse_view.A.get(), + cusparse_view.batch_primal_solutions.get(), reusable_device_scalar_value_0_.data(), - cusparse_view.batch_tmp_duals, + cusparse_view.batch_tmp_duals.get(), CUSPARSE_SPMM_CSR_ALG3, (f_t*)cusparse_view.buffer_non_transpose_batch.data(), stream_view_)); @@ -686,10 +686,10 @@ void convergence_information_t::compute_dual_residual( raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - cusparse_view.A_T, - cusparse_view.dual_solution, + cusparse_view.A_T.get(), + cusparse_view.dual_solution.get(), reusable_device_scalar_value_0_.data(), - cusparse_view.tmp_primal, + cusparse_view.tmp_primal.get(), CUSPARSE_SPMV_CSR_ALG2, (f_t*)cusparse_view.buffer_transpose.data(), stream_view_)); @@ -699,10 +699,10 @@ void convergence_information_t::compute_dual_residual( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - cusparse_view.A_T, - cusparse_view.batch_dual_solutions, + cusparse_view.A_T.get(), + cusparse_view.batch_dual_solutions.get(), reusable_device_scalar_value_0_.data(), - cusparse_view.batch_tmp_primals, + cusparse_view.batch_tmp_primals.get(), CUSPARSE_SPMM_CSR_ALG3, (f_t*)cusparse_view.buffer_transpose_batch.data(), stream_view_)); diff --git a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu index 9268e17910..cca4cce9ae 100644 --- a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu +++ b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu @@ -320,10 +320,10 @@ void infeasibility_information_t::compute_infeasibility_information( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - scaled_cusparse_view_.A, - scaled_cusparse_view_.batch_delta_primal_solutions, + scaled_cusparse_view_.A.get(), + scaled_cusparse_view_.batch_delta_primal_solutions.get(), reusable_device_scalar_value_0_.data(), - scaled_cusparse_view_.batch_tmp_duals, + scaled_cusparse_view_.batch_tmp_duals.get(), CUSPARSE_SPMM_CSR_ALG3, (f_t*)scaled_cusparse_view_.buffer_non_transpose_batch.data(), stream_view_)); @@ -332,10 +332,10 @@ void infeasibility_information_t::compute_infeasibility_information( CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - scaled_cusparse_view_.A_T, - scaled_cusparse_view_.batch_delta_dual_solutions, + scaled_cusparse_view_.A_T.get(), + scaled_cusparse_view_.batch_delta_dual_solutions.get(), reusable_device_scalar_value_0_.data(), - scaled_cusparse_view_.batch_tmp_primals, + scaled_cusparse_view_.batch_tmp_primals.get(), CUSPARSE_SPMM_CSR_ALG3, (f_t*)scaled_cusparse_view_.buffer_transpose_batch.data(), stream_view_)); @@ -551,10 +551,10 @@ void infeasibility_information_t::compute_homogenous_primal_residual( raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), - cusparse_view.A, - cusparse_view.primal_solution, + cusparse_view.A.get(), + cusparse_view.primal_solution.get(), reusable_device_scalar_value_0_.data(), - cusparse_view.tmp_dual, + cusparse_view.tmp_dual.get(), CUSPARSE_SPMV_CSR_ALG2, (f_t*)cusparse_view.buffer_non_transpose.data(), stream_view_)); @@ -621,10 +621,10 @@ void infeasibility_information_t::compute_homogenous_dual_residual( RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_neg_1_.data(), - cusparse_view.A_T, - cusparse_view.dual_solution, + cusparse_view.A_T.get(), + cusparse_view.dual_solution.get(), reusable_device_scalar_value_0_.data(), - cusparse_view.tmp_primal, + cusparse_view.tmp_primal.get(), CUSPARSE_SPMV_CSR_ALG2, (f_t*)cusparse_view.buffer_transpose.data(), stream_view_)); From a601b6d1960b434956e5f8fcaac106ffe9c2e6af Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Thu, 11 Jun 2026 10:20:58 +0200 Subject: [PATCH 3/7] added _t to deleter functors --- cpp/src/barrier/cusparse_info.hpp | 4 ++-- cpp/src/pdlp/cusparse_view.cu | 4 ++-- cpp/src/pdlp/cusparse_view.hpp | 20 ++++++++++---------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cpp/src/barrier/cusparse_info.hpp b/cpp/src/barrier/cusparse_info.hpp index 893c6d3850..c6cdf436b7 100644 --- a/cpp/src/barrier/cusparse_info.hpp +++ b/cpp/src/barrier/cusparse_info.hpp @@ -23,7 +23,7 @@ namespace cuopt::linear_programming::dual_simplex { -struct cusparse_spgemm_deleter { +struct cusparse_spgemm_deleter_t { void operator()(cusparseSpGEMMDescr_t descr) const noexcept { if (descr) { CUOPT_CUSPARSE_TRY_NO_THROW(cusparseSpGEMM_destroyDescr(descr)); } @@ -31,7 +31,7 @@ struct cusparse_spgemm_deleter { }; using cusparse_spgemm_uptr = - std::unique_ptr, cusparse_spgemm_deleter>; + std::unique_ptr, cusparse_spgemm_deleter_t>; template struct cusparse_info_t { diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index afc65e0f7c..fa5c336a18 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -185,7 +185,7 @@ using cusparseSpMVOp_sig = cusparse_sig::pointer matches the cuSPARSE handle. // --------------------------------------------------------------------------- -struct cusparse_sp_mat_deleter { +struct cusparse_sp_mat_deleter_t { void operator()(cusparseSpMatDescr_t descr) const noexcept { if (descr) { RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(descr)); } } }; -struct cusparse_dn_vec_deleter { +struct cusparse_dn_vec_deleter_t { void operator()(cusparseDnVecDescr_t descr) const noexcept { if (descr) { RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(descr)); } } }; -struct cusparse_dn_mat_deleter { +struct cusparse_dn_mat_deleter_t { void operator()(cusparseDnMatDescr_t descr) const noexcept { if (descr) { RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(descr)); } @@ -57,11 +57,11 @@ struct cusparse_dn_mat_deleter { }; using cusparse_sp_mat_uptr = - std::unique_ptr, cusparse_sp_mat_deleter>; + std::unique_ptr, cusparse_sp_mat_deleter_t>; using cusparse_dn_vec_uptr = - std::unique_ptr, cusparse_dn_vec_deleter>; + std::unique_ptr, cusparse_dn_vec_deleter_t>; using cusparse_dn_mat_uptr = - std::unique_ptr, cusparse_dn_mat_deleter>; + std::unique_ptr, cusparse_dn_mat_deleter_t>; // Borrowed views: identical to the raw cuSPARSE handle types but the alias makes the non-owning // intent explicit at API boundaries. Pair with the *_uptr aliases above: @@ -112,18 +112,18 @@ inline cusparse_dn_mat_uptr make_dnmat( // static. // --------------------------------------------------------------------------- -struct cusparse_spmvop_descr_deleter { +struct cusparse_spmvop_descr_deleter_t { void operator()(cusparseSpMVOpDescr_t descr) const noexcept; }; -struct cusparse_spmvop_plan_deleter { +struct cusparse_spmvop_plan_deleter_t { void operator()(cusparseSpMVOpPlan_t plan) const noexcept; }; using cusparse_spmvop_descr_uptr = - std::unique_ptr, cusparse_spmvop_descr_deleter>; + std::unique_ptr, cusparse_spmvop_descr_deleter_t>; using cusparse_spmvop_plan_uptr = - std::unique_ptr, cusparse_spmvop_plan_deleter>; + std::unique_ptr, cusparse_spmvop_plan_deleter_t>; // Factories. `make_spmvop_descr` resolves cusparseSpMVOp_createDescr via dlsym. cusparse_spmvop_descr_uptr make_spmvop_descr(cusparseHandle_t handle, From 803efbf9aaf5bc0683b4a327f29bfb9d52e4b33d Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Thu, 11 Jun 2026 10:39:26 +0200 Subject: [PATCH 4/7] code rabbit nit comment RAFT_CUSPARSE_TRY --- cpp/src/pdlp/cusparse_view.cu | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index fa5c336a18..4cd06e33f1 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -144,7 +144,7 @@ void my_cusparsespmm_preprocess(cusparseHandle_t handle, return CUDA_R_64F; } }(); - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + RAFT_CUSPARSE_TRY(cusparseSetStream(handle, stream)); RAFT_CUSPARSE_TRY(cusparseSpMM_preprocess( handle, opA, opB, alpha, matA, matB, beta, matC, float_type, alg, externalBuffer)); } @@ -1172,7 +1172,8 @@ void cusparse_view_t::create_spmv_op_plans(bool is_reflected) if (!is_cusparse_runtime_spmvop_supported() || !(std::is_same_v)) { return; } static const auto buffer_size = dynamic_load_runtime::function("cusparseSpMVOp_bufferSize"); - CUSPARSE_CHECK(cusparseSetStream(handle_ptr_->get_cusparse_handle(), handle_ptr_->get_stream())); + RAFT_CUSPARSE_TRY( + cusparseSetStream(handle_ptr_->get_cusparse_handle(), handle_ptr_->get_stream())); // Prepare buffers for At_y SpMVOp size_t buffer_size_transpose = 0; RAFT_CUSPARSE_TRY((*buffer_size)(handle_ptr_->get_cusparse_handle(), From 43013a52f96d5f5aa0fc6f2ddcac9f8e2f0af5ee Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Thu, 11 Jun 2026 10:39:50 +0200 Subject: [PATCH 5/7] style --- cpp/src/barrier/barrier.cu | 15 ++++++---- cpp/src/barrier/cusparse_view.cu | 2 +- cpp/src/pdlp/cusparse_view.cu | 28 +++++++++---------- cpp/src/pdlp/cusparse_view.hpp | 6 ++-- .../optimal_batch_size_handler.cu | 5 ++-- cpp/src/pdlp/pdhg.cu | 4 +-- cpp/src/pdlp/pdlp.cu | 2 +- 7 files changed, 31 insertions(+), 31 deletions(-) diff --git a/cpp/src/barrier/barrier.cu b/cpp/src/barrier/barrier.cu index a5995503c7..04dde2ccea 100644 --- a/cpp/src/barrier/barrier.cu +++ b/cpp/src/barrier/barrier.cu @@ -2595,7 +2595,8 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t(d_dx_residual_6, stream_view_); @@ -2701,7 +2704,8 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t::cusparse_view_t(raft::handle_t const* handle_ptr, A_T_indices_ = device_copy(A.i, handle_ptr->get_stream()); A_T_data_ = device_copy(A.x, handle_ptr->get_stream()); - A_ = detail::make_csr( + A_ = detail::make_csr( rows, cols, nnz, A_offsets_.data(), A_indices_.data(), A_data_.data()); A_T_ = detail::make_csr( cols, rows, nnz, A_T_offsets_.data(), A_T_indices_.data(), A_T_data_.data()); diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index 4cd06e33f1..fb9659cb4a 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -213,8 +213,7 @@ cusparse_spmvop_descr_uptr make_spmvop_descr(cusparseHandle_t handle, static const auto fn = dynamic_load_runtime::function("cusparseSpMVOp_createDescr"); cusparseSpMVOpDescr_t descr{nullptr}; - RAFT_CUSPARSE_TRY( - (*fn)(handle, &descr, opA, matA, vecX, vecY, vecZ, computeType, buffer.data())); + RAFT_CUSPARSE_TRY((*fn)(handle, &descr, opA, matA, vecX, vecY, vecZ, computeType, buffer.data())); return cusparse_spmvop_descr_uptr{descr}; } @@ -314,23 +313,22 @@ cusparse_view_t::cusparse_view_t( if (batch_mode_) { [[maybe_unused]] const bool is_cupdlpx = is_cupdlpx_restart(hyper_params); cuopt_assert(is_cupdlpx, "Batch mode only supported with cuPDLPx restart"); - batch_dual_solutions = make_dnmat(op_problem_scaled.n_constraints, + batch_dual_solutions = make_dnmat(op_problem_scaled.n_constraints, climber_strategies.size(), climber_strategies.size(), current_saddle_point_state.get_dual_solution().data(), CUSPARSE_ORDER_ROW); - batch_current_AtYs = make_dnmat(op_problem_scaled.n_variables, + batch_current_AtYs = make_dnmat(op_problem_scaled.n_variables, climber_strategies.size(), climber_strategies.size(), current_saddle_point_state.get_current_AtY().data(), CUSPARSE_ORDER_ROW); - batch_potential_next_dual_solution = - make_dnmat(op_problem_scaled.n_constraints, - climber_strategies.size(), - op_problem_scaled.n_constraints, - _potential_next_dual_solution.data(), - CUSPARSE_ORDER_COL); - batch_next_AtYs = make_dnmat(op_problem_scaled.n_variables, + batch_potential_next_dual_solution = make_dnmat(op_problem_scaled.n_constraints, + climber_strategies.size(), + op_problem_scaled.n_constraints, + _potential_next_dual_solution.data(), + CUSPARSE_ORDER_COL); + batch_next_AtYs = make_dnmat(op_problem_scaled.n_variables, climber_strategies.size(), op_problem_scaled.n_variables, current_saddle_point_state.get_next_AtY().data(), @@ -341,7 +339,7 @@ cusparse_view_t::cusparse_view_t( climber_strategies.size(), _reflected_primal_solution.data(), CUSPARSE_ORDER_ROW); - batch_dual_gradients = make_dnmat(op_problem_scaled.n_constraints, + batch_dual_gradients = make_dnmat(op_problem_scaled.n_constraints, climber_strategies.size(), climber_strategies.size(), current_saddle_point_state.get_dual_gradient().data(), @@ -371,9 +369,9 @@ cusparse_view_t::cusparse_view_t( _tmp_primal.data(), CUSPARSE_ORDER_COL); - primal_gradient = make_dnvec( - current_saddle_point_state.get_primal_gradient().size(), // It is 0 in cupdlpx - current_saddle_point_state.get_primal_gradient().data()); + primal_gradient = + make_dnvec(current_saddle_point_state.get_primal_gradient().size(), // It is 0 in cupdlpx + current_saddle_point_state.get_primal_gradient().data()); dual_gradient = make_dnvec(op_problem_scaled.n_constraints, current_saddle_point_state.get_dual_gradient().data()); diff --git a/cpp/src/pdlp/cusparse_view.hpp b/cpp/src/pdlp/cusparse_view.hpp index 7f6a195852..66879a04e7 100644 --- a/cpp/src/pdlp/cusparse_view.hpp +++ b/cpp/src/pdlp/cusparse_view.hpp @@ -96,8 +96,7 @@ inline cusparse_dn_mat_uptr make_dnmat( int64_t row, int64_t col, int64_t ld, f_t* values, cusparseOrder_t order) { cusparseDnMatDescr_t descr{nullptr}; - RAFT_CUSPARSE_TRY( - raft::sparse::detail::cusparsecreatednmat(&descr, row, col, ld, values, order)); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednmat(&descr, row, col, ld, values, order)); return cusparse_dn_mat_uptr{descr}; } @@ -137,8 +136,7 @@ cusparse_spmvop_descr_uptr make_spmvop_descr(cusparseHandle_t handle, // `make_spmvop_plan` passes nullptr/0 for ltoIRBuf/ltoIRSize so cuSPARSE JITs // internally; cuOpt does not supply user-provided LTO IR. -cusparse_spmvop_plan_uptr make_spmvop_plan(cusparseHandle_t handle, - cusparseSpMVOpDescr_t descr); +cusparse_spmvop_plan_uptr make_spmvop_plan(cusparseHandle_t handle, cusparseSpMVOpDescr_t descr); #endif template diff --git a/cpp/src/pdlp/optimal_batch_size_handler/optimal_batch_size_handler.cu b/cpp/src/pdlp/optimal_batch_size_handler/optimal_batch_size_handler.cu index 15809bdb46..67681718a3 100644 --- a/cpp/src/pdlp/optimal_batch_size_handler/optimal_batch_size_handler.cu +++ b/cpp/src/pdlp/optimal_batch_size_handler/optimal_batch_size_handler.cu @@ -45,8 +45,7 @@ struct SpMM_benchmarks_context_t { int col_dual = current_batch_size; int ld_dual = current_batch_size; - x_descr = - make_dnmat(rows_primal, col_primal, ld_primal, x.data(), CUSPARSE_ORDER_ROW); + x_descr = make_dnmat(rows_primal, col_primal, ld_primal, x.data(), CUSPARSE_ORDER_ROW); y_descr = make_dnmat(rows_dual, col_dual, ld_dual, y.data(), CUSPARSE_ORDER_ROW); // Init buffers for SpMMs @@ -267,7 +266,7 @@ int optimal_batch_size_handler(const optimization_problem_t& op_proble right_node, benchmark_runs, op_problem.get_handle_ptr()); - int current_step = 1; + int current_step = 1; #ifdef BATCH_VERBOSE_MODE std::cout << "Starting batch size: " << current_batch_size << " and ratio: " << current_ratio diff --git a/cpp/src/pdlp/pdhg.cu b/cpp/src/pdlp/pdhg.cu index 44ddece1c1..cbf3cd6f71 100644 --- a/cpp/src/pdlp/pdhg.cu +++ b/cpp/src/pdlp/pdhg.cu @@ -1383,8 +1383,8 @@ void pdhg_solver_t::update_solution( // Update cusparse views to point to the new values, cost is marginal RAFT_CUSPARSE_TRY(cusparseDnVecSetValues(cusparse_view_.current_AtY.get(), current_saddle_point_state_.current_AtY_.data())); - RAFT_CUSPARSE_TRY( - cusparseDnVecSetValues(cusparse_view_.next_AtY.get(), current_saddle_point_state_.next_AtY_.data())); + RAFT_CUSPARSE_TRY(cusparseDnVecSetValues(cusparse_view_.next_AtY.get(), + current_saddle_point_state_.next_AtY_.data())); RAFT_CUSPARSE_TRY(cusparseDnVecSetValues(cusparse_view_.potential_next_dual_solution.get(), potential_next_dual_solution_.data())); RAFT_CUSPARSE_TRY(cusparseDnVecSetValues(cusparse_view_.primal_solution.get(), diff --git a/cpp/src/pdlp/pdlp.cu b/cpp/src/pdlp/pdlp.cu index e1a874d20f..9ca8728636 100644 --- a/cpp/src/pdlp/pdlp.cu +++ b/cpp/src/pdlp/pdlp.cu @@ -1750,7 +1750,7 @@ void pdlp_solver_t::resize_and_swap_all_context_loop( // Reset all cusparse view // Reset cuSparse views for PDHG. unique_ptr move-assign destroys the old descriptor first. - auto& pdhg_cusparse_view = pdhg_solver_.get_cusparse_view(); + auto& pdhg_cusparse_view = pdhg_solver_.get_cusparse_view(); pdhg_cusparse_view.batch_dual_solutions = detail::make_dnmat(op_problem_scaled_.n_constraints, climber_strategies_.size(), From 036469d54cb4a351f6c053a9513514d6ce0d67b6 Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Thu, 11 Jun 2026 10:50:50 +0200 Subject: [PATCH 6/7] added has_value checks and fail loudly --- cpp/src/pdlp/cusparse_view.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index fb9659cb4a..a5fe1b1e89 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -212,6 +212,7 @@ cusparse_spmvop_descr_uptr make_spmvop_descr(cusparseHandle_t handle, { static const auto fn = dynamic_load_runtime::function("cusparseSpMVOp_createDescr"); + if (!fn.has_value()) { EXE_CUOPT_FAIL("Unable to resolve cusparseSpMVOp_createDescr at runtime"); } cusparseSpMVOpDescr_t descr{nullptr}; RAFT_CUSPARSE_TRY((*fn)(handle, &descr, opA, matA, vecX, vecY, vecZ, computeType, buffer.data())); return cusparse_spmvop_descr_uptr{descr}; @@ -221,6 +222,7 @@ cusparse_spmvop_plan_uptr make_spmvop_plan(cusparseHandle_t handle, cusparseSpMV { static const auto fn = dynamic_load_runtime::function("cusparseSpMVOp_createPlan"); + if (!fn.has_value()) { EXE_CUOPT_FAIL("Unable to resolve cusparseSpMVOp_createPlan at runtime"); } cusparseSpMVOpPlan_t plan{nullptr}; // cuOpt does not supply user-provided LTO IR; pass nullptr/0 so cuSPARSE JITs internally. RAFT_CUSPARSE_TRY((*fn)(handle, descr, &plan, /*ltoIRBuf=*/nullptr, /*ltoIRSize=*/0)); From 520218ac009ff6dd54f48d27b995428436b2070e Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Thu, 11 Jun 2026 11:07:12 +0200 Subject: [PATCH 7/7] style --- cpp/src/pdlp/cusparse_view.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index a5fe1b1e89..9dc6850878 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -212,7 +212,9 @@ cusparse_spmvop_descr_uptr make_spmvop_descr(cusparseHandle_t handle, { static const auto fn = dynamic_load_runtime::function("cusparseSpMVOp_createDescr"); - if (!fn.has_value()) { EXE_CUOPT_FAIL("Unable to resolve cusparseSpMVOp_createDescr at runtime"); } + if (!fn.has_value()) { + EXE_CUOPT_FAIL("Unable to resolve cusparseSpMVOp_createDescr at runtime"); + } cusparseSpMVOpDescr_t descr{nullptr}; RAFT_CUSPARSE_TRY((*fn)(handle, &descr, opA, matA, vecX, vecY, vecZ, computeType, buffer.data())); return cusparse_spmvop_descr_uptr{descr};