From 23b445ad35e125096b43bf78affa090310e658ae Mon Sep 17 00:00:00 2001 From: Marco Franzreb Salgado Date: Thu, 7 May 2026 00:31:36 -0700 Subject: [PATCH 01/56] Add Windows support --- CMakeLists.txt | 3 + ci/build_common.sh | 22 ++++--- cmake/NVBenchCUPTI.cmake | 14 +++-- cmake/NVBenchConfigTarget.cmake | 18 +++++- nvbench/config.cuh.in | 4 ++ testing/axes_metadata.cu | 1 + testing/cmake/CMakeLists.txt | 15 +++++ testing/cmake/test_export/CMakeLists.txt | 74 +++++++++++++----------- 8 files changed, 105 insertions(+), 46 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 01b39bbe..fcd44bb4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,6 +39,9 @@ if (${CUDAToolkit_VERSION} VERSION_LESS 11.3) endif() option(BUILD_SHARED_LIBS "Build NVBench as a shared library" ON) +if (WIN32 AND BUILD_SHARED_LIBS) + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) +endif() option(NVBench_ENABLE_NVML "Build with NVML support from the Cuda Toolkit." ON) option(NVBench_ENABLE_CUPTI "Build NVBench with CUPTI." ${cupti_default}) diff --git a/ci/build_common.sh b/ci/build_common.sh index 2c30414a..718e22fa 100755 --- a/ci/build_common.sh +++ b/ci/build_common.sh @@ -12,6 +12,7 @@ CUDA_COMPILER=${CUDACXX:-nvcc} # $CUDACXX if set, otherwise `nvcc` CUDA_ARCHS= # Empty, use presets by default. GLOBAL_CMAKE_OPTIONS=() DISABLE_CUB_BENCHMARKS= # Enable to force-disable building CUB benchmarks. +HOST_OS="linux" # "linux" or "windows" # Check if the correct number of arguments has been provided function usage { @@ -21,6 +22,7 @@ function usage { echo echo "Options:" echo " -v/--verbose: enable shell echo for debugging" + echo " -os: Target OS, \"linux\" or \"windows\" (Defaults to linux)" echo " -cuda: CUDA compiler (Defaults to \$CUDACXX if set, otherwise nvcc)" echo " -cxx: Host compiler (Defaults to \$CXX if set, otherwise g++)" echo " -std: CUDA/C++ standard (Defaults to 17)" @@ -32,6 +34,7 @@ function usage { echo " $ PARALLEL_LEVEL=8 $0 -cxx g++-9" echo " $ $0 -cxx clang++-8" echo " $ $0 -cxx g++-8 -std 20 -arch 80-real -v -cuda /usr/local/bin/nvcc" + echo " $ $0 -os windows -cxx cl.exe -arch native" echo " $ $0 -cmake-options \"-DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS=-Wfatal-errors\"" exit 1 } @@ -44,6 +47,7 @@ args=("$@") while [ "${#args[@]}" -ne 0 ]; do case "${args[0]}" in -v | --verbose) VERBOSE=1; args=("${args[@]:1}");; + -os) HOST_OS="${args[1]}"; args=("${args[@]:2}");; -cxx) HOST_COMPILER="${args[1]}"; args=("${args[@]:2}");; -std) CXX_STANDARD="${args[1]}"; args=("${args[@]:2}");; -cuda) CUDA_COMPILER="${args[1]}"; args=("${args[@]:2}");; @@ -66,8 +70,8 @@ while [ "${#args[@]}" -ne 0 ]; do done # Convert to full paths: -HOST_COMPILER=$(which ${HOST_COMPILER}) -CUDA_COMPILER=$(which ${CUDA_COMPILER}) +HOST_COMPILER=$(which "${HOST_COMPILER}") +CUDA_COMPILER=$(which "${CUDA_COMPILER}") if [[ -n "${CUDA_ARCHS}" ]]; then GLOBAL_CMAKE_OPTIONS+=("-DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCHS}") @@ -91,11 +95,15 @@ BUILD_DIR="../build/${CCCL_BUILD_INFIX}" # The most recent build will always be symlinked to cccl/build/latest mkdir -p $BUILD_DIR -rm -f ../build/latest -ln -sf $BUILD_DIR ../build/latest - -# Now that BUILD_DIR exists, use readlink to canonicalize the path: -BUILD_DIR=$(readlink -f "${BUILD_DIR}") +if [[ "${HOST_OS}" == "windows" ]]; then + # Git Bash on Windows cannot create directory symlinks without elevated privileges + BUILD_DIR=$(cd "${BUILD_DIR}" && pwd) +else + rm -f ../build/latest + ln -sf $BUILD_DIR ../build/latest + # Now that BUILD_DIR exists, use readlink to canonicalize the path: + BUILD_DIR=$(readlink -f "${BUILD_DIR}") +fi # Prepare environment for CMake: export CMAKE_BUILD_PARALLEL_LEVEL="${PARALLEL_LEVEL}" diff --git a/cmake/NVBenchCUPTI.cmake b/cmake/NVBenchCUPTI.cmake index 10a70893..789f4af1 100644 --- a/cmake/NVBenchCUPTI.cmake +++ b/cmake/NVBenchCUPTI.cmake @@ -23,14 +23,20 @@ function(nvbench_add_cupti_dep dep_name) add_library(nvbench::${dep_name_lower} SHARED IMPORTED) find_library(NVBench_${dep_name_upper}_LIBRARY ${dep_name_lower} REQUIRED - DOC "The full path to lib${dep_name_lower}.so from the CUDA Toolkit." + DOC "The import library for ${dep_name_lower} from the CUDA Toolkit." HINTS "${nvbench_cupti_root}/lib64" ) mark_as_advanced(NVBench_${dep_name_upper}_LIBRARY) - set_target_properties(nvbench::${dep_name_lower} PROPERTIES - IMPORTED_LOCATION "${NVBench_${dep_name_upper}_LIBRARY}" - ) + if (WIN32) + set_target_properties(nvbench::${dep_name_lower} PROPERTIES + IMPORTED_IMPLIB "${NVBench_${dep_name_upper}_LIBRARY}" + ) + else() + set_target_properties(nvbench::${dep_name_lower} PROPERTIES + IMPORTED_LOCATION "${NVBench_${dep_name_upper}_LIBRARY}" + ) + endif() endfunction() nvbench_add_cupti_dep(cupti) diff --git a/cmake/NVBenchConfigTarget.cmake b/cmake/NVBenchConfigTarget.cmake index 7c8a4b93..536e9663 100644 --- a/cmake/NVBenchConfigTarget.cmake +++ b/cmake/NVBenchConfigTarget.cmake @@ -91,11 +91,25 @@ endif() if (CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") # fmtlib uses llvm's _BitInt internally, which is not available when compiling through nvcc: target_compile_definitions(nvbench.build_interface INTERFACE "FMT_USE_BITINT=0") + if (MSVC) + # cudafe cannot evaluate fmtlib's UTF-8 literal check even when /utf-8 is passed to the host compiler: + target_compile_definitions(nvbench.build_interface INTERFACE + $<$:FMT_UNICODE=0> + ) + endif() endif() target_compile_options(nvbench.build_interface INTERFACE $<$:-Xcudafe=--display_error_number> $<$:-Wno-deprecated-gpu-targets> + $<$,$>:-Xcompiler=/utf-8> + # Suppress cudafe diagnostics triggered by fmtlib headers when compiled through MSVC+nvcc: + # 27: character value is out of range (char32_t sentinel values in lookup tables) + # 128: loop is not reachable (dead code in constexpr string comparison) + # 2417: constexpr constructor calls non-constexpr function (bigint default ctor) + $<$,$>:-Xcudafe=--diag_suppress=27> + $<$,$>:-Xcudafe=--diag_suppress=128> + $<$,$>:-Xcudafe=--diag_suppress=2417> ) if (NVBench_ENABLE_WERROR) target_compile_options(nvbench.build_interface INTERFACE @@ -115,8 +129,8 @@ function(nvbench_config_target target_name) # the library path, other times they're in a subdirectory that isn't added to # the library path... # To simplify installed nvbench usage, add the CUPTI libraries path to the - # installed nvbench rpath: - if (NVBench_ENABLE_CUPTI AND nvbench_cupti_root) + # installed nvbench rpath (Unix only; Windows uses PATH for DLL lookup): + if (NVBench_ENABLE_CUPTI AND nvbench_cupti_root AND NOT WIN32) set_target_properties(${target_name} PROPERTIES INSTALL_RPATH "${nvbench_cupti_root}/lib64" ) diff --git a/nvbench/config.cuh.in b/nvbench/config.cuh.in index d151c130..2f89f4cc 100644 --- a/nvbench/config.cuh.in +++ b/nvbench/config.cuh.in @@ -24,7 +24,11 @@ // Defined if NVBench has been built with CUPTI support. #cmakedefine NVBENCH_HAS_CUPTI +#if defined(_MSVC_LANG) +#define NVBENCH_CPLUSPLUS _MSVC_LANG +#else #define NVBENCH_CPLUSPLUS __cplusplus +#endif // Detect current dialect: #if NVBENCH_CPLUSPLUS < 201703L diff --git a/testing/axes_metadata.cu b/testing/axes_metadata.cu index 9e546602..d6cc441d 100644 --- a/testing/axes_metadata.cu +++ b/testing/axes_metadata.cu @@ -24,6 +24,7 @@ #include #include +#include #include #include "test_asserts.cuh" diff --git a/testing/cmake/CMakeLists.txt b/testing/cmake/CMakeLists.txt index c4e4eb77..506de5a7 100644 --- a/testing/cmake/CMakeLists.txt +++ b/testing/cmake/CMakeLists.txt @@ -9,6 +9,14 @@ set(cmake_opts -D "CMAKE_CUDA_FLAGS=${CMAKE_CUDA_FLAGS}" -D "CMAKE_CUDA_ARCHITECTURES=${arches}" ) +if (WIN32) + list(APPEND cmake_opts + -D "CMAKE_CUDA_HOST_COMPILER=${CMAKE_CXX_COMPILER}" + -D "CMAKE_LINKER=${CMAKE_LINKER}" + -D "CMAKE_RC_COMPILER=${CMAKE_RC_COMPILER}" + -D "CMAKE_MT=${CMAKE_MT}" + ) +endif() # Temporary installation prefix for tests against installed nvbench: set(tmp_install_prefix "${CMAKE_CURRENT_BINARY_DIR}/test_nvbench_install") @@ -32,6 +40,13 @@ function(nvbench_add_compile_test full_test_name_var subdir test_id) ${ARGN} --test-command "${CMAKE_CTEST_COMMAND}" --output-on-failure ) + if (WIN32 AND NVBench_ENABLE_CUPTI AND nvbench_cupti_root) + cmake_path(NATIVE_PATH nvbench_cupti_root cupti_native) + cmake_path(NATIVE_PATH NVBench_EXECUTABLE_OUTPUT_DIR bin_native) + set_tests_properties(${test_name} PROPERTIES + ENVIRONMENT "PATH=${bin_native}\\;${cupti_native}\\lib64\\;$ENV{PATH}" + ) + endif() set(${full_test_name_var} ${test_name} PARENT_SCOPE) endfunction() diff --git a/testing/cmake/test_export/CMakeLists.txt b/testing/cmake/test_export/CMakeLists.txt index e3d7d33c..21faa30a 100644 --- a/testing/cmake/test_export/CMakeLists.txt +++ b/testing/cmake/test_export/CMakeLists.txt @@ -10,45 +10,53 @@ enable_testing() add_test(NAME test_bench COMMAND "$" --timeout 1) add_test(NAME nvbench_ctl COMMAND "$") -# Setup LD_LIBRARY_PATH for testing -if (UNIX) - set(ctl_lib_path "") - set(cupti_lib_path "") - - # Need to find installed libnvbench.so for installed nvbench-ctl. - # Not needed for build_tree test because of RUNPATH. - if (TEST_TYPE STREQUAL "INSTALL_TREE") - get_property(nvbench_config TARGET nvbench::nvbench - PROPERTY IMPORTED_CONFIGURATIONS - ) - - list(LENGTH nvbench_config num_configs) - if (num_configs GREATER 1) - message(WARNING - "Multiple IMPORTED_CONFIGURATIONS for nvbench::nvbench. " - "Picking the first one. This may cause issues." - ) - list(GET nvbench_config 0 nvbench_config) - endif() +# Setup runtime library paths for testing. +# Unix uses LD_LIBRARY_PATH; Windows uses PATH for DLL lookup. +get_property(nvbench_config TARGET nvbench::nvbench + PROPERTY IMPORTED_CONFIGURATIONS +) +list(LENGTH nvbench_config num_configs) +if (num_configs GREATER 1) + message(WARNING + "Multiple IMPORTED_CONFIGURATIONS for nvbench::nvbench. " + "Picking the first one. This may cause issues." + ) + list(GET nvbench_config 0 nvbench_config) +endif() - get_property(ctl_lib_path TARGET nvbench::nvbench - PROPERTY IMPORTED_LOCATION_${nvbench_config} - ) - cmake_path(GET ctl_lib_path PARENT_PATH ctl_lib_path) - endif() +set(nvbench_lib_dir "") +# On Unix the build tree uses RUNPATH so only the install tree needs the path. +# On Windows there is no RUNPATH so we always need the DLL directory. +if (WIN32 OR TEST_TYPE STREQUAL "INSTALL_TREE") + get_property(nvbench_lib TARGET nvbench::nvbench + PROPERTY IMPORTED_LOCATION_${nvbench_config} + ) + cmake_path(GET nvbench_lib PARENT_PATH nvbench_lib_dir) +endif() - # Need to add the CUPTI path to LD_LIBRARY_PATH to make sure CUPTI libraries - # are found at runtime: - if (TARGET nvbench::cupti) - get_property(cupti_lib_path TARGET nvbench::cupti PROPERTY IMPORTED_LOCATION) - cmake_path(GET cupti_lib_path PARENT_PATH cupti_lib_path) +set(cupti_lib_dir "") +if (TARGET nvbench::cupti) + if (WIN32) + get_property(cupti_lib TARGET nvbench::cupti PROPERTY IMPORTED_IMPLIB) + else() + get_property(cupti_lib TARGET nvbench::cupti PROPERTY IMPORTED_LOCATION) endif() + cmake_path(GET cupti_lib PARENT_PATH cupti_lib_dir) +endif() +if (WIN32) + set(lib_dirs "${nvbench_lib_dir}\\;${cupti_lib_dir}") set_property(TEST test_bench PROPERTY - ENVIRONMENT "LD_LIBRARY_PATH=${cupti_lib_path}" + ENVIRONMENT "PATH=${lib_dirs}\\;$ENV{PATH}" ) set_property(TEST nvbench_ctl PROPERTY - ENVIRONMENT "LD_LIBRARY_PATH=${ctl_lib_path}:${cupti_lib_path}" + ENVIRONMENT "PATH=${lib_dirs}\\;$ENV{PATH}" + ) +else() + set_property(TEST test_bench PROPERTY + ENVIRONMENT "LD_LIBRARY_PATH=${cupti_lib_dir}" + ) + set_property(TEST nvbench_ctl PROPERTY + ENVIRONMENT "LD_LIBRARY_PATH=${nvbench_lib_dir}:${cupti_lib_dir}" ) - endif() From 787e435e6d5201905e798a23ab067e680d94f3b1 Mon Sep 17 00:00:00 2001 From: Marco Franzreb Salgado Date: Wed, 13 May 2026 10:38:55 -0700 Subject: [PATCH 02/56] Windows support: revert CI specific changes --- ci/build_common.sh | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/ci/build_common.sh b/ci/build_common.sh index 718e22fa..2c30414a 100755 --- a/ci/build_common.sh +++ b/ci/build_common.sh @@ -12,7 +12,6 @@ CUDA_COMPILER=${CUDACXX:-nvcc} # $CUDACXX if set, otherwise `nvcc` CUDA_ARCHS= # Empty, use presets by default. GLOBAL_CMAKE_OPTIONS=() DISABLE_CUB_BENCHMARKS= # Enable to force-disable building CUB benchmarks. -HOST_OS="linux" # "linux" or "windows" # Check if the correct number of arguments has been provided function usage { @@ -22,7 +21,6 @@ function usage { echo echo "Options:" echo " -v/--verbose: enable shell echo for debugging" - echo " -os: Target OS, \"linux\" or \"windows\" (Defaults to linux)" echo " -cuda: CUDA compiler (Defaults to \$CUDACXX if set, otherwise nvcc)" echo " -cxx: Host compiler (Defaults to \$CXX if set, otherwise g++)" echo " -std: CUDA/C++ standard (Defaults to 17)" @@ -34,7 +32,6 @@ function usage { echo " $ PARALLEL_LEVEL=8 $0 -cxx g++-9" echo " $ $0 -cxx clang++-8" echo " $ $0 -cxx g++-8 -std 20 -arch 80-real -v -cuda /usr/local/bin/nvcc" - echo " $ $0 -os windows -cxx cl.exe -arch native" echo " $ $0 -cmake-options \"-DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS=-Wfatal-errors\"" exit 1 } @@ -47,7 +44,6 @@ args=("$@") while [ "${#args[@]}" -ne 0 ]; do case "${args[0]}" in -v | --verbose) VERBOSE=1; args=("${args[@]:1}");; - -os) HOST_OS="${args[1]}"; args=("${args[@]:2}");; -cxx) HOST_COMPILER="${args[1]}"; args=("${args[@]:2}");; -std) CXX_STANDARD="${args[1]}"; args=("${args[@]:2}");; -cuda) CUDA_COMPILER="${args[1]}"; args=("${args[@]:2}");; @@ -70,8 +66,8 @@ while [ "${#args[@]}" -ne 0 ]; do done # Convert to full paths: -HOST_COMPILER=$(which "${HOST_COMPILER}") -CUDA_COMPILER=$(which "${CUDA_COMPILER}") +HOST_COMPILER=$(which ${HOST_COMPILER}) +CUDA_COMPILER=$(which ${CUDA_COMPILER}) if [[ -n "${CUDA_ARCHS}" ]]; then GLOBAL_CMAKE_OPTIONS+=("-DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCHS}") @@ -95,15 +91,11 @@ BUILD_DIR="../build/${CCCL_BUILD_INFIX}" # The most recent build will always be symlinked to cccl/build/latest mkdir -p $BUILD_DIR -if [[ "${HOST_OS}" == "windows" ]]; then - # Git Bash on Windows cannot create directory symlinks without elevated privileges - BUILD_DIR=$(cd "${BUILD_DIR}" && pwd) -else - rm -f ../build/latest - ln -sf $BUILD_DIR ../build/latest - # Now that BUILD_DIR exists, use readlink to canonicalize the path: - BUILD_DIR=$(readlink -f "${BUILD_DIR}") -fi +rm -f ../build/latest +ln -sf $BUILD_DIR ../build/latest + +# Now that BUILD_DIR exists, use readlink to canonicalize the path: +BUILD_DIR=$(readlink -f "${BUILD_DIR}") # Prepare environment for CMake: export CMAKE_BUILD_PARALLEL_LEVEL="${PARALLEL_LEVEL}" From 78b674bf2e125e3c34f65ab221c0001adfd96535 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Fri, 15 May 2026 15:27:37 -0500 Subject: [PATCH 03/56] Re- Enable NVBench Windows build job Remove gate that disables Windows NVBench build job in pr.yaml --- .github/workflows/pr.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 7ea85397..40427264 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -79,8 +79,6 @@ jobs: nvbench-windows: name: NVBench Windows CUDA${{ matrix.config.cuda }} ${{ matrix.config.host }} C++${{ matrix.config.std }} - # TODO: Re-enable after https://github.com/NVIDIA/nvbench/pull/354 fixes the Windows build. - if: false permissions: id-token: write contents: read From 460e14f9b7d250685ad5bda30b94ca96f4cfe0b3 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Sat, 16 May 2026 08:43:08 -0500 Subject: [PATCH 04/56] Install CUDA Profiler API into toolkit --- .github/workflows/build-windows.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index 777d2665..4676bfed 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -119,6 +119,7 @@ jobs: - name: Build NVBench env: + NVBENCH_WINDOWS_CUDA: ${{ inputs.cuda }} NVBENCH_WINDOWS_STD: ${{ inputs.std }} NVBENCH_WINDOWS_ARCH: ${{ inputs.arch }} run: | @@ -131,6 +132,7 @@ jobs: @" `$ErrorActionPreference = 'Stop' git config --global --add safe.directory '$containerRepo' + & '$containerRepo/ci/windows/install_cuda_profiler_api.ps1' -cudaVersion '$env:NVBENCH_WINDOWS_CUDA' & '$containerRepo/ci/windows/build_nvbench.ps1' -std '$env:NVBENCH_WINDOWS_STD' -arch '$env:NVBENCH_WINDOWS_ARCH' exit `$LASTEXITCODE "@ | Set-Content -Path $script -Encoding UTF8 From c6cd09756858407a7d1f7a5e32d8e5e1ccf2c95f Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Sat, 16 May 2026 09:02:26 -0500 Subject: [PATCH 05/56] Add intall_cuda_profiler_api.ps1 --- ci/windows/install_cuda_profiler_api.ps1 | 61 ++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 ci/windows/install_cuda_profiler_api.ps1 diff --git a/ci/windows/install_cuda_profiler_api.ps1 b/ci/windows/install_cuda_profiler_api.ps1 new file mode 100644 index 00000000..005631e4 --- /dev/null +++ b/ci/windows/install_cuda_profiler_api.ps1 @@ -0,0 +1,61 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +Param( + [Parameter(Mandatory = $false)] + [Alias("cudaVersion")] + [string]$CUDA_VERSION = "" +) + +$ErrorActionPreference = "Stop" + +if (-not $CUDA_VERSION) { + if ($env:CUDA_PATH -and ($env:CUDA_PATH -match "v(?\d+\.\d+)$")) { + $CUDA_VERSION = $Matches.version + } else { + throw "Could not determine CUDA version. Provide -cudaVersion or set CUDA_PATH." + } +} + +$version = [Version]$CUDA_VERSION +$major = $version.Major +$minor = $version.Minor +$build = $version.Build + +if ($build -lt 0) { + $build = 0 +} + +$mmbVersionTag = "${major}.${minor}.${build}" +$mmVersionTag = "${major}.${minor}" +$cudaRoot = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$mmVersionTag" +$profilerHeader = "$cudaRoot\include\cuda_profiler_api.h" + +if (Test-Path $profilerHeader) { + Write-Host "CUDA Profiler API is already installed: $profilerHeader" + return +} + +$component = "cuda_profiler_api_$mmVersionTag" +$cudaMajorUri = "${mmbVersionTag}/network_installers/cuda_${mmbVersionTag}_windows_network.exe" +$cudaVersionUrl = "https://developer.download.nvidia.com/compute/cuda/$cudaMajorUri" +$installer = Join-Path $env:TEMP "cuda_${mmbVersionTag}_windows_network.exe" + +Write-Host "Installing CUDA component: $component" +Write-Host "Downloading CUDA network installer: $cudaVersionUrl" +Invoke-WebRequest -Uri $cudaVersionUrl -OutFile $installer -UseBasicParsing + +try { + $process = Start-Process -Wait -PassThru -FilePath $installer -ArgumentList @("-s", $component) + if ($process.ExitCode -ne 0) { + throw "CUDA network installer failed with exit code $($process.ExitCode)." + } +} finally { + Remove-Item $installer -ErrorAction SilentlyContinue +} + +if (-not (Test-Path $profilerHeader)) { + throw "CUDA Profiler API installation completed, but header was not found: $profilerHeader" +} + +Write-Host "CUDA Profiler API installed: $profilerHeader" From f8c0554f0e6aa3641cfd4509c2ba3aa017fa0c0a Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Sat, 16 May 2026 16:51:14 -0500 Subject: [PATCH 06/56] Inform MSVC that static library export main Attempt to fix "LINK : fatal error LNK1561: entry point must be defined" when building benchmarks which need main function provided by static library libnvbench_main after #350 --- nvbench/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nvbench/CMakeLists.txt b/nvbench/CMakeLists.txt index 7466dcd7..27cb62bf 100644 --- a/nvbench/CMakeLists.txt +++ b/nvbench/CMakeLists.txt @@ -134,6 +134,10 @@ nvbench_config_target(nvbench.main) target_compile_definitions(nvbench.main PRIVATE NVBENCH_NO_IMPLICIT_SYSTEM_HEADER) # Propagate `nvbench` to consumers but keep NVBench's own build warning-visible. target_link_libraries(nvbench.main PUBLIC nvbench) +if (MSVC) + # inform MSVC that library provides main + target_link_options(nvbench.main INTERFACE "/INCLUDE:main") +endif() # Ensure CUDA/CUPTI/NVML include dirs are visible for nvbench.main's build. target_link_libraries(nvbench.main PRIVATE ${ctk_libraries}) # Add NVBench's headers privately so the main library itself sees warnings. From f402b57e50918f2bfeb2f5e66e83f23d9438b753 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Sun, 17 May 2026 06:21:08 -0500 Subject: [PATCH 07/56] Review feedback to PowerShell script --- ci/windows/install_cuda_profiler_api.ps1 | 27 +++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/ci/windows/install_cuda_profiler_api.ps1 b/ci/windows/install_cuda_profiler_api.ps1 index 005631e4..dc133ff7 100644 --- a/ci/windows/install_cuda_profiler_api.ps1 +++ b/ci/windows/install_cuda_profiler_api.ps1 @@ -9,6 +9,26 @@ Param( $ErrorActionPreference = "Stop" +function Assert-NvidiaAuthenticodeSignature { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$Path + ) + + $signature = Get-AuthenticodeSignature -FilePath $Path + if ($signature.Status -ne "Valid") { + throw "Invalid Authenticode signature for '$Path': $($signature.Status) $($signature.StatusMessage)" + } + + $subject = $signature.SignerCertificate.Subject + if ($subject -notmatch "NVIDIA") { + throw "Unexpected signer for '$Path': $subject" + } + + Write-Host "Validated Authenticode signature for '$Path': $subject" +} + if (-not $CUDA_VERSION) { if ($env:CUDA_PATH -and ($env:CUDA_PATH -match "v(?\d+\.\d+)$")) { $CUDA_VERSION = $Matches.version @@ -28,7 +48,11 @@ if ($build -lt 0) { $mmbVersionTag = "${major}.${minor}.${build}" $mmVersionTag = "${major}.${minor}" -$cudaRoot = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$mmVersionTag" +$cudaRoot = if ($env:CUDA_PATH) { + $env:CUDA_PATH +} else { + "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$mmVersionTag" +} $profilerHeader = "$cudaRoot\include\cuda_profiler_api.h" if (Test-Path $profilerHeader) { @@ -44,6 +68,7 @@ $installer = Join-Path $env:TEMP "cuda_${mmbVersionTag}_windows_network.exe" Write-Host "Installing CUDA component: $component" Write-Host "Downloading CUDA network installer: $cudaVersionUrl" Invoke-WebRequest -Uri $cudaVersionUrl -OutFile $installer -UseBasicParsing +Assert-NvidiaAuthenticodeSignature -Path $installer try { $process = Start-Process -Wait -PassThru -FilePath $installer -ArgumentList @("-s", $component) From 71eacdc9701f2f924a83554d1fac13ab9827b283 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Sun, 17 May 2026 06:22:37 -0500 Subject: [PATCH 08/56] Fix how CMAKE_CUDA_HOST_COMPILER is set in call to cmake --- testing/cmake/CMakeLists.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/testing/cmake/CMakeLists.txt b/testing/cmake/CMakeLists.txt index 506de5a7..2c8d174b 100644 --- a/testing/cmake/CMakeLists.txt +++ b/testing/cmake/CMakeLists.txt @@ -10,8 +10,12 @@ set(cmake_opts -D "CMAKE_CUDA_ARCHITECTURES=${arches}" ) if (WIN32) + set(cuda_host_compiler "${CMAKE_CUDA_HOST_COMPILER}") + if (NOT cuda_host_compiler) + set(cuda_host_compiler "${CMAKE_CXX_COMPILER}") + endif() list(APPEND cmake_opts - -D "CMAKE_CUDA_HOST_COMPILER=${CMAKE_CXX_COMPILER}" + -D "CMAKE_CUDA_HOST_COMPILER=${cuda_host_compiler}" -D "CMAKE_LINKER=${CMAKE_LINKER}" -D "CMAKE_RC_COMPILER=${CMAKE_RC_COMPILER}" -D "CMAKE_MT=${CMAKE_MT}" From cc209bb103845a5155be43b4ddb125231d4b5f75 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Sun, 17 May 2026 06:23:28 -0500 Subject: [PATCH 09/56] Filter out empty directories LD_LIBRARY_PATH/PATH Act on review feedback regarding corner cases when testing may dependent on the directory it is performed from --- testing/cmake/test_export/CMakeLists.txt | 28 +++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/testing/cmake/test_export/CMakeLists.txt b/testing/cmake/test_export/CMakeLists.txt index 21faa30a..f8b03b4b 100644 --- a/testing/cmake/test_export/CMakeLists.txt +++ b/testing/cmake/test_export/CMakeLists.txt @@ -44,19 +44,37 @@ if (TARGET nvbench::cupti) cmake_path(GET cupti_lib PARENT_PATH cupti_lib_dir) endif() +function(join_nonempty_paths out_var separator) + set(paths "") + foreach(path IN LISTS ARGN) + if (path) + list(APPEND paths "${path}") + endif() + endforeach() + list(JOIN paths "${separator}" joined_paths) + set(${out_var} "${joined_paths}" PARENT_SCOPE) +endfunction() + if (WIN32) - set(lib_dirs "${nvbench_lib_dir}\\;${cupti_lib_dir}") + join_nonempty_paths(lib_dirs "\\;" "${nvbench_lib_dir}" "${cupti_lib_dir}") + if (lib_dirs) + set(path_env "PATH=${lib_dirs}\\;$ENV{PATH}") + else() + set(path_env "PATH=$ENV{PATH}") + endif() set_property(TEST test_bench PROPERTY - ENVIRONMENT "PATH=${lib_dirs}\\;$ENV{PATH}" + ENVIRONMENT "${path_env}" ) set_property(TEST nvbench_ctl PROPERTY - ENVIRONMENT "PATH=${lib_dirs}\\;$ENV{PATH}" + ENVIRONMENT "${path_env}" ) else() + join_nonempty_paths(cupti_ld_library_path ":" "${cupti_lib_dir}") + join_nonempty_paths(nvbench_ld_library_path ":" "${nvbench_lib_dir}" "${cupti_lib_dir}") set_property(TEST test_bench PROPERTY - ENVIRONMENT "LD_LIBRARY_PATH=${cupti_lib_dir}" + ENVIRONMENT "LD_LIBRARY_PATH=${cupti_ld_library_path}" ) set_property(TEST nvbench_ctl PROPERTY - ENVIRONMENT "LD_LIBRARY_PATH=${nvbench_lib_dir}:${cupti_lib_dir}" + ENVIRONMENT "LD_LIBRARY_PATH=${nvbench_ld_library_path}" ) endif() From d980c2a7cfcb508fec39f0f1a2b8e9c262067cb3 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Sun, 17 May 2026 06:50:58 -0500 Subject: [PATCH 10/56] Check that cudaVersion and :CUDA_PATH are consistent --- ci/windows/install_cuda_profiler_api.ps1 | 34 +++++++++++++++++++----- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/ci/windows/install_cuda_profiler_api.ps1 b/ci/windows/install_cuda_profiler_api.ps1 index dc133ff7..b518a85e 100644 --- a/ci/windows/install_cuda_profiler_api.ps1 +++ b/ci/windows/install_cuda_profiler_api.ps1 @@ -9,6 +9,19 @@ Param( $ErrorActionPreference = "Stop" +function Get-CudaVersionFromPath { + Param( + [Parameter(Mandatory = $false)] + [string]$Path = "" + ) + + if ($Path -and $Path -match "v(?\d+\.\d+)[\\/]?$") { + return $Matches.version + } + + return "" +} + function Assert-NvidiaAuthenticodeSignature { Param( [Parameter(Mandatory = $true)] @@ -30,10 +43,9 @@ function Assert-NvidiaAuthenticodeSignature { } if (-not $CUDA_VERSION) { - if ($env:CUDA_PATH -and ($env:CUDA_PATH -match "v(?\d+\.\d+)$")) { - $CUDA_VERSION = $Matches.version - } else { - throw "Could not determine CUDA version. Provide -cudaVersion or set CUDA_PATH." + $CUDA_VERSION = Get-CudaVersionFromPath -Path $env:CUDA_PATH + if (-not $CUDA_VERSION) { + throw "Could not determine CUDA version. Provide -cudaVersion or set CUDA_PATH to a path ending in v.." } } @@ -48,10 +60,18 @@ if ($build -lt 0) { $mmbVersionTag = "${major}.${minor}.${build}" $mmVersionTag = "${major}.${minor}" -$cudaRoot = if ($env:CUDA_PATH) { - $env:CUDA_PATH + +if ($env:CUDA_PATH) { + $cudaPathVersion = Get-CudaVersionFromPath -Path $env:CUDA_PATH + if (-not $cudaPathVersion) { + throw "CUDA_PATH is set but does not end in v.: $env:CUDA_PATH" + } + if ($cudaPathVersion -ne $mmVersionTag) { + throw "CUDA_PATH points to CUDA $cudaPathVersion, but CUDA $mmVersionTag was requested." + } + $cudaRoot = $env:CUDA_PATH } else { - "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$mmVersionTag" + $cudaRoot = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$mmVersionTag" } $profilerHeader = "$cudaRoot\include\cuda_profiler_api.h" From 6c44ec695869cef474285f8347b874a4a860515f Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Sun, 17 May 2026 06:51:30 -0500 Subject: [PATCH 11/56] Do not overwrite ENVIRONMENT property with empty values --- testing/cmake/test_export/CMakeLists.txt | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/testing/cmake/test_export/CMakeLists.txt b/testing/cmake/test_export/CMakeLists.txt index f8b03b4b..78c9eb73 100644 --- a/testing/cmake/test_export/CMakeLists.txt +++ b/testing/cmake/test_export/CMakeLists.txt @@ -71,10 +71,14 @@ if (WIN32) else() join_nonempty_paths(cupti_ld_library_path ":" "${cupti_lib_dir}") join_nonempty_paths(nvbench_ld_library_path ":" "${nvbench_lib_dir}" "${cupti_lib_dir}") - set_property(TEST test_bench PROPERTY - ENVIRONMENT "LD_LIBRARY_PATH=${cupti_ld_library_path}" - ) - set_property(TEST nvbench_ctl PROPERTY - ENVIRONMENT "LD_LIBRARY_PATH=${nvbench_ld_library_path}" - ) + if (cupti_ld_library_path) + set_property(TEST test_bench PROPERTY + ENVIRONMENT "LD_LIBRARY_PATH=${cupti_ld_library_path}" + ) + endif() + if (nvbench_ld_library_path) + set_property(TEST nvbench_ctl PROPERTY + ENVIRONMENT "LD_LIBRARY_PATH=${nvbench_ld_library_path}" + ) + endif() endif() From 7f2a6dc1129986c1a08c4819cb7e4151ea64c871 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Sun, 17 May 2026 07:15:04 -0500 Subject: [PATCH 12/56] Implement retry logic in downloading of CUDA Profiler API --- ci/windows/install_cuda_profiler_api.ps1 | 34 +++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/ci/windows/install_cuda_profiler_api.ps1 b/ci/windows/install_cuda_profiler_api.ps1 index b518a85e..6a88ac3d 100644 --- a/ci/windows/install_cuda_profiler_api.ps1 +++ b/ci/windows/install_cuda_profiler_api.ps1 @@ -42,6 +42,38 @@ function Assert-NvidiaAuthenticodeSignature { Write-Host "Validated Authenticode signature for '$Path': $subject" } +function Invoke-WebRequestWithRetry { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$Uri, + + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$OutFile, + + [Parameter(Mandatory = $false)] + [ValidateRange(1, 10)] + [int]$MaxAttempts = 3 + ) + + for ($attempt = 1; $attempt -le $MaxAttempts; $attempt++) { + try { + Remove-Item $OutFile -ErrorAction SilentlyContinue + Invoke-WebRequest -Uri $Uri -OutFile $OutFile -UseBasicParsing + return + } catch { + if ($attempt -eq $MaxAttempts) { + throw + } + + $delaySeconds = 5 * $attempt + Write-Warning "Download failed on attempt $attempt of $MaxAttempts. Retrying in $delaySeconds seconds. $_" + Start-Sleep -Seconds $delaySeconds + } + } +} + if (-not $CUDA_VERSION) { $CUDA_VERSION = Get-CudaVersionFromPath -Path $env:CUDA_PATH if (-not $CUDA_VERSION) { @@ -87,7 +119,7 @@ $installer = Join-Path $env:TEMP "cuda_${mmbVersionTag}_windows_network.exe" Write-Host "Installing CUDA component: $component" Write-Host "Downloading CUDA network installer: $cudaVersionUrl" -Invoke-WebRequest -Uri $cudaVersionUrl -OutFile $installer -UseBasicParsing +Invoke-WebRequestWithRetry -Uri $cudaVersionUrl -OutFile $installer Assert-NvidiaAuthenticodeSignature -Path $installer try { From 287d041d15bfa35f2cef4bbd372c7c68bd575d01 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Sun, 17 May 2026 07:30:43 -0500 Subject: [PATCH 13/56] Strengthen publisher verification of downloaded artifact --- ci/windows/install_cuda_profiler_api.ps1 | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/ci/windows/install_cuda_profiler_api.ps1 b/ci/windows/install_cuda_profiler_api.ps1 index 6a88ac3d..8ce13ff5 100644 --- a/ci/windows/install_cuda_profiler_api.ps1 +++ b/ci/windows/install_cuda_profiler_api.ps1 @@ -34,12 +34,16 @@ function Assert-NvidiaAuthenticodeSignature { throw "Invalid Authenticode signature for '$Path': $($signature.Status) $($signature.StatusMessage)" } - $subject = $signature.SignerCertificate.Subject - if ($subject -notmatch "NVIDIA") { - throw "Unexpected signer for '$Path': $subject" + $expectedPublisher = "NVIDIA Corporation" + $publisher = $signature.SignerCertificate.GetNameInfo( + [System.Security.Cryptography.X509Certificates.X509NameType]::SimpleName, + $false + ) + if ($publisher -ne $expectedPublisher) { + throw "Unexpected signer for '$Path': $publisher" } - Write-Host "Validated Authenticode signature for '$Path': $subject" + Write-Host "Validated Authenticode signature for '$Path': $publisher" } function Invoke-WebRequestWithRetry { From 9f8bf81ac1762687f1850cba56f95ef0370e1b70 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Sun, 17 May 2026 07:40:42 -0500 Subject: [PATCH 14/56] Prepend new folders to LD_LIBRARY_PATH, do not overwrite --- testing/cmake/test_export/CMakeLists.txt | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/testing/cmake/test_export/CMakeLists.txt b/testing/cmake/test_export/CMakeLists.txt index 78c9eb73..27fd5088 100644 --- a/testing/cmake/test_export/CMakeLists.txt +++ b/testing/cmake/test_export/CMakeLists.txt @@ -55,6 +55,14 @@ function(join_nonempty_paths out_var separator) set(${out_var} "${joined_paths}" PARENT_SCOPE) endfunction() +function(prepend_runtime_path out_var path_var separator env_var) + if (DEFINED ENV{${env_var}} AND NOT "$ENV{${env_var}}" STREQUAL "") + set(${out_var} "${${path_var}}${separator}$ENV{${env_var}}" PARENT_SCOPE) + else() + set(${out_var} "${${path_var}}" PARENT_SCOPE) + endif() +endfunction() + if (WIN32) join_nonempty_paths(lib_dirs "\\;" "${nvbench_lib_dir}" "${cupti_lib_dir}") if (lib_dirs) @@ -72,13 +80,15 @@ else() join_nonempty_paths(cupti_ld_library_path ":" "${cupti_lib_dir}") join_nonempty_paths(nvbench_ld_library_path ":" "${nvbench_lib_dir}" "${cupti_lib_dir}") if (cupti_ld_library_path) + prepend_runtime_path(test_bench_ld_library_path cupti_ld_library_path ":" LD_LIBRARY_PATH) set_property(TEST test_bench PROPERTY - ENVIRONMENT "LD_LIBRARY_PATH=${cupti_ld_library_path}" + ENVIRONMENT "LD_LIBRARY_PATH=${test_bench_ld_library_path}" ) endif() if (nvbench_ld_library_path) + prepend_runtime_path(nvbench_ctl_ld_library_path nvbench_ld_library_path ":" LD_LIBRARY_PATH) set_property(TEST nvbench_ctl PROPERTY - ENVIRONMENT "LD_LIBRARY_PATH=${nvbench_ld_library_path}" + ENVIRONMENT "LD_LIBRARY_PATH=${nvbench_ctl_ld_library_path}" ) endif() endif() From adabe4afa55117de52709268c48aa4ae53b3b3b9 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Sun, 17 May 2026 07:42:36 -0500 Subject: [PATCH 15/56] Implement timeout, fail on 40x HTTP response code 4xx responses now fail immediately, and the installer is bounded to 15 minutes before being killed and reported as a timeout. --- ci/windows/install_cuda_profiler_api.ps1 | 41 +++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/ci/windows/install_cuda_profiler_api.ps1 b/ci/windows/install_cuda_profiler_api.ps1 index 8ce13ff5..6ae738d7 100644 --- a/ci/windows/install_cuda_profiler_api.ps1 +++ b/ci/windows/install_cuda_profiler_api.ps1 @@ -46,6 +46,30 @@ function Assert-NvidiaAuthenticodeSignature { Write-Host "Validated Authenticode signature for '$Path': $publisher" } +function Get-HttpStatusCodeFromError { + Param( + [Parameter(Mandatory = $true)] + $ErrorRecord + ) + + $responseProperty = $ErrorRecord.Exception.PSObject.Properties["Response"] + if (-not $responseProperty) { + return $null + } + + $response = $responseProperty.Value + if ($null -eq $response) { + return $null + } + + $statusCodeProperty = $response.PSObject.Properties["StatusCode"] + if (-not $statusCodeProperty) { + return $null + } + + return [int]$statusCodeProperty.Value +} + function Invoke-WebRequestWithRetry { Param( [Parameter(Mandatory = $true)] @@ -67,6 +91,11 @@ function Invoke-WebRequestWithRetry { Invoke-WebRequest -Uri $Uri -OutFile $OutFile -UseBasicParsing return } catch { + $statusCode = Get-HttpStatusCodeFromError -ErrorRecord $_ + if ($statusCode -ge 400 -and $statusCode -lt 500) { + throw "Download failed with non-retryable HTTP status $statusCode from '$Uri'. $_" + } + if ($attempt -eq $MaxAttempts) { throw } @@ -126,12 +155,22 @@ Write-Host "Downloading CUDA network installer: $cudaVersionUrl" Invoke-WebRequestWithRetry -Uri $cudaVersionUrl -OutFile $installer Assert-NvidiaAuthenticodeSignature -Path $installer +$installerTimeoutSeconds = 900 +$process = $null try { - $process = Start-Process -Wait -PassThru -FilePath $installer -ArgumentList @("-s", $component) + $process = Start-Process -PassThru -FilePath $installer -ArgumentList @("-s", $component) + if (-not $process.WaitForExit($installerTimeoutSeconds * 1000)) { + Stop-Process -Id $process.Id -Force -ErrorAction SilentlyContinue + throw "CUDA network installer timed out after $installerTimeoutSeconds seconds." + } + if ($process.ExitCode -ne 0) { throw "CUDA network installer failed with exit code $($process.ExitCode)." } } finally { + if ($process) { + $process.Dispose() + } Remove-Item $installer -ErrorAction SilentlyContinue } From a5b3e97129de7988248c26a4c9eaca34be506edf Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Sun, 17 May 2026 11:53:50 -0500 Subject: [PATCH 16/56] USE ENVIRONMENT_MODIFICATION property, not ENVIRONMENT --- testing/cmake/CMakeLists.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/testing/cmake/CMakeLists.txt b/testing/cmake/CMakeLists.txt index 2c8d174b..9c78fa0a 100644 --- a/testing/cmake/CMakeLists.txt +++ b/testing/cmake/CMakeLists.txt @@ -45,10 +45,11 @@ function(nvbench_add_compile_test full_test_name_var subdir test_id) --test-command "${CMAKE_CTEST_COMMAND}" --output-on-failure ) if (WIN32 AND NVBench_ENABLE_CUPTI AND nvbench_cupti_root) - cmake_path(NATIVE_PATH nvbench_cupti_root cupti_native) - cmake_path(NATIVE_PATH NVBench_EXECUTABLE_OUTPUT_DIR bin_native) + set(cupti_lib_dir "${nvbench_cupti_root}/lib64") set_tests_properties(${test_name} PROPERTIES - ENVIRONMENT "PATH=${bin_native}\\;${cupti_native}\\lib64\\;$ENV{PATH}" + ENVIRONMENT_MODIFICATION + "PATH=path_list_prepend:$" + "PATH=path_list_prepend:$" ) endif() set(${full_test_name_var} ${test_name} PARENT_SCOPE) From c6347b5e9d7c15fbfcb8f935efb1efa69a07112b Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Sun, 17 May 2026 11:56:04 -0500 Subject: [PATCH 17/56] escape environment modification values --- testing/cmake/test_export/CMakeLists.txt | 25 +++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/testing/cmake/test_export/CMakeLists.txt b/testing/cmake/test_export/CMakeLists.txt index 27fd5088..f9e3c324 100644 --- a/testing/cmake/test_export/CMakeLists.txt +++ b/testing/cmake/test_export/CMakeLists.txt @@ -64,18 +64,21 @@ function(prepend_runtime_path out_var path_var separator env_var) endfunction() if (WIN32) - join_nonempty_paths(lib_dirs "\\;" "${nvbench_lib_dir}" "${cupti_lib_dir}") - if (lib_dirs) - set(path_env "PATH=${lib_dirs}\\;$ENV{PATH}") - else() - set(path_env "PATH=$ENV{PATH}") + set(path_modifications "") + if (cupti_lib_dir) + list(APPEND path_modifications "PATH=path_list_prepend:$") + endif() + if (nvbench_lib_dir) + list(APPEND path_modifications "PATH=path_list_prepend:$") + endif() + if (path_modifications) + set_property(TEST test_bench PROPERTY + ENVIRONMENT_MODIFICATION ${path_modifications} + ) + set_property(TEST nvbench_ctl PROPERTY + ENVIRONMENT_MODIFICATION ${path_modifications} + ) endif() - set_property(TEST test_bench PROPERTY - ENVIRONMENT "${path_env}" - ) - set_property(TEST nvbench_ctl PROPERTY - ENVIRONMENT "${path_env}" - ) else() join_nonempty_paths(cupti_ld_library_path ":" "${cupti_lib_dir}") join_nonempty_paths(nvbench_ld_library_path ":" "${nvbench_lib_dir}" "${cupti_lib_dir}") From 919468f40f5561cbac5d2626a9dc5aa684dd57b6 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Sun, 17 May 2026 14:43:13 -0500 Subject: [PATCH 18/56] Fix cmake script error breaking the build --- testing/cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/cmake/CMakeLists.txt b/testing/cmake/CMakeLists.txt index 9c78fa0a..676f9952 100644 --- a/testing/cmake/CMakeLists.txt +++ b/testing/cmake/CMakeLists.txt @@ -46,7 +46,7 @@ function(nvbench_add_compile_test full_test_name_var subdir test_id) ) if (WIN32 AND NVBench_ENABLE_CUPTI AND nvbench_cupti_root) set(cupti_lib_dir "${nvbench_cupti_root}/lib64") - set_tests_properties(${test_name} PROPERTIES + set_property(TEST ${test_name} PROPERTY ENVIRONMENT_MODIFICATION "PATH=path_list_prepend:$" "PATH=path_list_prepend:$" From 177c7b054887c465e56a6313023f62395df17af2 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Sun, 17 May 2026 14:46:40 -0500 Subject: [PATCH 19/56] Added recommented timeout to Invoke-WebRequest --- ci/windows/install_cuda_profiler_api.ps1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/windows/install_cuda_profiler_api.ps1 b/ci/windows/install_cuda_profiler_api.ps1 index 6ae738d7..c0a2fec8 100644 --- a/ci/windows/install_cuda_profiler_api.ps1 +++ b/ci/windows/install_cuda_profiler_api.ps1 @@ -88,7 +88,7 @@ function Invoke-WebRequestWithRetry { for ($attempt = 1; $attempt -le $MaxAttempts; $attempt++) { try { Remove-Item $OutFile -ErrorAction SilentlyContinue - Invoke-WebRequest -Uri $Uri -OutFile $OutFile -UseBasicParsing + Invoke-WebRequest -Uri $Uri -OutFile $OutFile -UseBasicParsing -TimeoutSec 300 return } catch { $statusCode = Get-HttpStatusCodeFromError -ErrorRecord $_ From f83429a734845e902e1baf6791eddde6f7db3c80 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Sun, 17 May 2026 15:56:12 -0500 Subject: [PATCH 20/56] Set cmake_minimum_required version to 3.30.4, consistent with main project --- testing/cmake/test_export/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/cmake/test_export/CMakeLists.txt b/testing/cmake/test_export/CMakeLists.txt index f9e3c324..15e2206f 100644 --- a/testing/cmake/test_export/CMakeLists.txt +++ b/testing/cmake/test_export/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.20.1) +cmake_minimum_required(VERSION 3.30.4) project(NVBenchTestExport CUDA CXX) message(STATUS "NVBench_DIR=${NVBench_DIR}") From ccfa1b5802f18bf147be226edf3086f35470d154 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Sun, 17 May 2026 15:59:25 -0500 Subject: [PATCH 21/56] Pass NVBENCH environment variables through docker for Windows build --- .github/workflows/build-windows.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index 4676bfed..b0cdb653 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -132,8 +132,8 @@ jobs: @" `$ErrorActionPreference = 'Stop' git config --global --add safe.directory '$containerRepo' - & '$containerRepo/ci/windows/install_cuda_profiler_api.ps1' -cudaVersion '$env:NVBENCH_WINDOWS_CUDA' - & '$containerRepo/ci/windows/build_nvbench.ps1' -std '$env:NVBENCH_WINDOWS_STD' -arch '$env:NVBENCH_WINDOWS_ARCH' + & '$containerRepo/ci/windows/install_cuda_profiler_api.ps1' -cudaVersion "`$env:NVBENCH_WINDOWS_CUDA" + & '$containerRepo/ci/windows/build_nvbench.ps1' -std "`$env:NVBENCH_WINDOWS_STD" -arch "`$env:NVBENCH_WINDOWS_ARCH" exit `$LASTEXITCODE "@ | Set-Content -Path $script -Encoding UTF8 @@ -154,6 +154,9 @@ jobs: "--env", "GITHUB_REPOSITORY=$env:GITHUB_REPOSITORY", "--env", "GITHUB_RUN_ID=$env:GITHUB_RUN_ID", "--env", "GITHUB_SHA=$env:GITHUB_SHA", + "--env", "NVBENCH_WINDOWS_ARCH=$env:NVBENCH_WINDOWS_ARCH", + "--env", "NVBENCH_WINDOWS_CUDA=$env:NVBENCH_WINDOWS_CUDA", + "--env", "NVBENCH_WINDOWS_STD=$env:NVBENCH_WINDOWS_STD", "--env", "SCCACHE_BUCKET=$env:SCCACHE_BUCKET", "--env", "SCCACHE_IDLE_TIMEOUT=$env:SCCACHE_IDLE_TIMEOUT", "--env", "SCCACHE_REGION=$env:SCCACHE_REGION", From cb00417c62445662bbe409a59111821778dbc314 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 06:41:22 -0500 Subject: [PATCH 22/56] Export IMPORTLIB_LOCATION for CUPTI on Windows and use in testing projects --- cmake/NVBenchCUPTI.cmake | 57 ++++++++++++++++++++++++ testing/cmake/CMakeLists.txt | 5 ++- testing/cmake/test_export/CMakeLists.txt | 6 +-- 3 files changed, 61 insertions(+), 7 deletions(-) diff --git a/cmake/NVBenchCUPTI.cmake b/cmake/NVBenchCUPTI.cmake index 789f4af1..acff09ff 100644 --- a/cmake/NVBenchCUPTI.cmake +++ b/cmake/NVBenchCUPTI.cmake @@ -16,6 +16,57 @@ endif() # - The dll locations are not specified # - Dependent libraries nvperf_* are not linked. # So we create our own targets: +function(nvbench_find_windows_cupti_runtime_library out_var dep_name library_path) + cmake_path(GET library_path PARENT_PATH library_dir) + set(runtime_search_dirs "${library_dir}") + + if ("${library_dir}" MATCHES "/Library/lib/x64$") + cmake_path(GET library_dir PARENT_PATH conda_lib_dir) + cmake_path(GET conda_lib_dir PARENT_PATH conda_library_dir) + list(APPEND runtime_search_dirs "${conda_library_dir}/bin") + elseif ("${library_dir}" MATCHES "/Library/lib$") + cmake_path(GET library_dir PARENT_PATH conda_library_dir) + list(APPEND runtime_search_dirs "${conda_library_dir}/bin") + endif() + + list(REMOVE_DUPLICATES runtime_search_dirs) + + foreach(runtime_search_dir IN LISTS runtime_search_dirs) + if ("${dep_name}" STREQUAL "cupti") + file(GLOB runtime_libraries LIST_DIRECTORIES false + "${runtime_search_dir}/cupti64_*.dll" + ) + if (NOT runtime_libraries) + file(GLOB runtime_libraries LIST_DIRECTORIES false + "${runtime_search_dir}/cupti.dll" + ) + endif() + else() + file(GLOB runtime_libraries LIST_DIRECTORIES false + "${runtime_search_dir}/${dep_name}.dll" + ) + endif() + + if (runtime_libraries) + list(LENGTH runtime_libraries num_runtime_libraries) + if (num_runtime_libraries GREATER 1) + message(FATAL_ERROR + "Found multiple runtime DLLs for ${dep_name}: ${runtime_libraries}" + ) + endif() + + list(GET runtime_libraries 0 runtime_library) + set(${out_var} "${runtime_library}" PARENT_SCOPE) + return() + endif() + endforeach() + + message(FATAL_ERROR + "Could not find the runtime DLL for ${dep_name}. " + "Searched these directories: ${runtime_search_dirs}" + ) +endfunction() + function(nvbench_add_cupti_dep dep_name) string(TOLOWER ${dep_name} dep_name_lower) string(TOUPPER ${dep_name} dep_name_upper) @@ -29,8 +80,14 @@ function(nvbench_add_cupti_dep dep_name) mark_as_advanced(NVBench_${dep_name_upper}_LIBRARY) if (WIN32) + nvbench_find_windows_cupti_runtime_library( + NVBench_${dep_name_upper}_DLL + ${dep_name_lower} + "${NVBench_${dep_name_upper}_LIBRARY}" + ) set_target_properties(nvbench::${dep_name_lower} PROPERTIES IMPORTED_IMPLIB "${NVBench_${dep_name_upper}_LIBRARY}" + IMPORTED_LOCATION "${NVBench_${dep_name_upper}_DLL}" ) else() set_target_properties(nvbench::${dep_name_lower} PROPERTIES diff --git a/testing/cmake/CMakeLists.txt b/testing/cmake/CMakeLists.txt index 676f9952..af6d10b5 100644 --- a/testing/cmake/CMakeLists.txt +++ b/testing/cmake/CMakeLists.txt @@ -44,8 +44,9 @@ function(nvbench_add_compile_test full_test_name_var subdir test_id) ${ARGN} --test-command "${CMAKE_CTEST_COMMAND}" --output-on-failure ) - if (WIN32 AND NVBench_ENABLE_CUPTI AND nvbench_cupti_root) - set(cupti_lib_dir "${nvbench_cupti_root}/lib64") + if (WIN32 AND TARGET nvbench::cupti) + get_property(cupti_runtime_lib TARGET nvbench::cupti PROPERTY IMPORTED_LOCATION) + cmake_path(GET cupti_runtime_lib PARENT_PATH cupti_lib_dir) set_property(TEST ${test_name} PROPERTY ENVIRONMENT_MODIFICATION "PATH=path_list_prepend:$" diff --git a/testing/cmake/test_export/CMakeLists.txt b/testing/cmake/test_export/CMakeLists.txt index 15e2206f..746b21eb 100644 --- a/testing/cmake/test_export/CMakeLists.txt +++ b/testing/cmake/test_export/CMakeLists.txt @@ -36,11 +36,7 @@ endif() set(cupti_lib_dir "") if (TARGET nvbench::cupti) - if (WIN32) - get_property(cupti_lib TARGET nvbench::cupti PROPERTY IMPORTED_IMPLIB) - else() - get_property(cupti_lib TARGET nvbench::cupti PROPERTY IMPORTED_LOCATION) - endif() + get_property(cupti_lib TARGET nvbench::cupti PROPERTY IMPORTED_LOCATION) cmake_path(GET cupti_lib PARENT_PATH cupti_lib_dir) endif() From 28376eb2e922e477a2cd850b9c62225b90694838 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 10:06:24 -0500 Subject: [PATCH 23/56] Add Zc:preprocessor to host compiler on Windows. Configure runtime env for tests to find CUPTI library --- .github/workflows/build-windows.yml | 86 ++++++++++++++++++++++++++++- ci/windows/build_common.psm1 | 21 ++++++- ci/windows/build_nvbench.ps1 | 39 ++++++++++++- cmake/NVBenchConfigTarget.cmake | 31 +++++++++++ examples/CMakeLists.txt | 1 + exec/CMakeLists.txt | 42 ++++++++++---- testing/CMakeLists.txt | 1 + testing/device/CMakeLists.txt | 1 + 8 files changed, 208 insertions(+), 14 deletions(-) diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index b0cdb653..42a40231 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -35,6 +35,22 @@ on: type: string required: false default: "" + gpu: + type: boolean + required: false + default: false + gpu_args: + type: string + required: false + default: "--gpus all" + run_tests: + type: boolean + required: false + default: false + device_testing: + type: boolean + required: false + default: false workflow_dispatch: inputs: cuda: @@ -73,6 +89,26 @@ on: type: string required: false default: "" + gpu: + description: "Expose host GPUs to the Windows devcontainer" + type: boolean + required: false + default: false + gpu_args: + description: "Docker arguments used when gpu is true" + type: string + required: false + default: "--gpus all" + run_tests: + description: "Run CTest after building" + type: boolean + required: false + default: false + device_testing: + description: "Enable tests that require a CUDA-capable device" + type: boolean + required: false + default: false permissions: contents: read @@ -113,6 +149,34 @@ jobs: aws-region: us-east-2 role-duration-seconds: 43200 + - name: Validate Windows build inputs + env: + NVBENCH_WINDOWS_CUDA: ${{ inputs.cuda }} + NVBENCH_WINDOWS_STD: ${{ inputs.std }} + NVBENCH_WINDOWS_ARCH: ${{ inputs.arch }} + run: | + $ErrorActionPreference = "Stop" + + if ($env:NVBENCH_WINDOWS_CUDA -notmatch '^\d+\.\d+$') { + throw "Invalid CUDA version '$env:NVBENCH_WINDOWS_CUDA'. Expected '.', for example '13.0'." + } + + if (@("17", "20") -notcontains $env:NVBENCH_WINDOWS_STD) { + throw "Invalid C++ standard '$env:NVBENCH_WINDOWS_STD'. Expected '17' or '20'." + } + + $arch = "$env:NVBENCH_WINDOWS_ARCH".Trim() + if ($arch) { + if (@("all", "all-major", "native") -notcontains $arch) { + $archItems = @($arch -split '[;,]' | Where-Object { $_ }) + foreach ($archItem in $archItems) { + if ($archItem -notmatch '^\d{2,3}(-real|-virtual)?$') { + throw "Invalid CMAKE_CUDA_ARCHITECTURES value '$arch'. Expected empty, 'all', 'all-major', 'native', or a list like '80;90-real'." + } + } + } + } + - name: Fetch Windows devcontainer image run: | docker pull "$env:WINDOWS_CI_IMAGE" @@ -122,6 +186,10 @@ jobs: NVBENCH_WINDOWS_CUDA: ${{ inputs.cuda }} NVBENCH_WINDOWS_STD: ${{ inputs.std }} NVBENCH_WINDOWS_ARCH: ${{ inputs.arch }} + NVBENCH_WINDOWS_GPU: ${{ inputs.gpu }} + NVBENCH_WINDOWS_GPU_ARGS: ${{ inputs.gpu_args }} + NVBENCH_WINDOWS_RUN_TESTS: ${{ inputs.run_tests }} + NVBENCH_WINDOWS_DEVICE_TESTING: ${{ inputs.device_testing }} run: | $ErrorActionPreference = "Stop" @@ -133,7 +201,7 @@ jobs: `$ErrorActionPreference = 'Stop' git config --global --add safe.directory '$containerRepo' & '$containerRepo/ci/windows/install_cuda_profiler_api.ps1' -cudaVersion "`$env:NVBENCH_WINDOWS_CUDA" - & '$containerRepo/ci/windows/build_nvbench.ps1' -std "`$env:NVBENCH_WINDOWS_STD" -arch "`$env:NVBENCH_WINDOWS_ARCH" + & '$containerRepo/ci/windows/build_nvbench.ps1' -std "`$env:NVBENCH_WINDOWS_STD" -arch "`$env:NVBENCH_WINDOWS_ARCH" -run-tests "`$env:NVBENCH_WINDOWS_RUN_TESTS" -device-testing "`$env:NVBENCH_WINDOWS_DEVICE_TESTING" exit `$LASTEXITCODE "@ | Set-Content -Path $script -Encoding UTF8 @@ -156,6 +224,9 @@ jobs: "--env", "GITHUB_SHA=$env:GITHUB_SHA", "--env", "NVBENCH_WINDOWS_ARCH=$env:NVBENCH_WINDOWS_ARCH", "--env", "NVBENCH_WINDOWS_CUDA=$env:NVBENCH_WINDOWS_CUDA", + "--env", "NVBENCH_WINDOWS_DEVICE_TESTING=$env:NVBENCH_WINDOWS_DEVICE_TESTING", + "--env", "NVBENCH_WINDOWS_GPU=$env:NVBENCH_WINDOWS_GPU", + "--env", "NVBENCH_WINDOWS_RUN_TESTS=$env:NVBENCH_WINDOWS_RUN_TESTS", "--env", "NVBENCH_WINDOWS_STD=$env:NVBENCH_WINDOWS_STD", "--env", "SCCACHE_BUCKET=$env:SCCACHE_BUCKET", "--env", "SCCACHE_IDLE_TIMEOUT=$env:SCCACHE_IDLE_TIMEOUT", @@ -163,7 +234,18 @@ jobs: "--env", "SCCACHE_S3_NO_CREDENTIALS=$env:SCCACHE_S3_NO_CREDENTIALS", "--env", "SCCACHE_S3_PREPROCESSOR_CACHE_KEY_PREFIX=$env:SCCACHE_S3_PREPROCESSOR_CACHE_KEY_PREFIX", "--env", "SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE=$env:SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE", - "--env", "SCCACHE_S3_USE_SSL=$env:SCCACHE_S3_USE_SSL", + "--env", "SCCACHE_S3_USE_SSL=$env:SCCACHE_S3_USE_SSL" + ) + + if ($env:NVBENCH_WINDOWS_GPU -eq "true") { + $dockerGpuArgs = @($env:NVBENCH_WINDOWS_GPU_ARGS -split '\s+' | Where-Object { $_ }) + if ($dockerGpuArgs.Length -eq 0) { + throw "NVBENCH_WINDOWS_GPU_ARGS must not be empty when GPU support is enabled." + } + $dockerArgs += $dockerGpuArgs + } + + $dockerArgs += @( "$env:WINDOWS_CI_IMAGE", "powershell", "-NoLogo", "-NoProfile", "-ExecutionPolicy", "Bypass", "-File", $containerScript diff --git a/ci/windows/build_common.psm1 b/ci/windows/build_common.psm1 index f2a32655..99217dc6 100644 --- a/ci/windows/build_common.psm1 +++ b/ci/windows/build_common.psm1 @@ -178,4 +178,23 @@ function Configure-And-Build-Preset { Build-Preset $BUILD_NAME $PRESET } -Export-ModuleMember -Function Print-EnvironmentDetails, Configure-Preset, Build-Preset, Configure-And-Build-Preset +function Test-Preset { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$BUILD_NAME, + + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$PRESET + ) + + Push-Location ".." + try { + Invoke-NativeCommand "$BUILD_NAME test" "ctest" @("--preset=$PRESET", "--output-on-failure") + } finally { + Pop-Location + } +} + +Export-ModuleMember -Function Print-EnvironmentDetails, Configure-Preset, Build-Preset, Configure-And-Build-Preset, Test-Preset diff --git a/ci/windows/build_nvbench.ps1 b/ci/windows/build_nvbench.ps1 index a39b0b34..e16f57e3 100644 --- a/ci/windows/build_nvbench.ps1 +++ b/ci/windows/build_nvbench.ps1 @@ -11,11 +11,37 @@ Param( [Parameter(Mandatory = $false)] [Alias("cmake-options")] - [string]$CMAKE_OPTIONS = "" + [string]$CMAKE_OPTIONS = "", + + [Parameter(Mandatory = $false)] + [Alias("run-tests")] + [string]$RUN_TESTS = "false", + + [Parameter(Mandatory = $false)] + [Alias("device-testing")] + [string]$DEVICE_TESTING = "false" ) $ErrorActionPreference = "Stop" +function ConvertTo-Bool { + Param( + [Parameter(Mandatory = $false)] + [AllowNull()] + [string]$Value = "" + ) + + $normalized = if ($null -eq $Value) { "" } else { $Value.Trim().ToLowerInvariant() } + if (@("1", "true", "yes", "on") -contains $normalized) { + return $true + } + if (@("0", "false", "no", "off", "") -contains $normalized) { + return $false + } + + throw "Expected a boolean-like value, got '$Value'." +} + $initialPath = Get-Location $pushed = $false @@ -27,15 +53,26 @@ if ((Split-Path $pwd -Leaf) -ne "ci") { try { Import-Module "$PSScriptRoot/build_common.psm1" -ArgumentList @($CXX_STANDARD, $CUDA_ARCH, $CMAKE_OPTIONS) -Force + $runTests = ConvertTo-Bool $RUN_TESTS + $deviceTesting = ConvertTo-Bool $DEVICE_TESTING + Print-EnvironmentDetails + Write-Host "RUN_TESTS=$runTests" + Write-Host "DEVICE_TESTING=$deviceTesting" $preset = "nvbench-ci" $localOptions = @( "-DCMAKE_CXX_STANDARD=$CXX_STANDARD", "-DCMAKE_CUDA_STANDARD=$CXX_STANDARD" ) + if ($deviceTesting) { + $localOptions += "-DNVBench_ENABLE_DEVICE_TESTING=ON" + } Configure-And-Build-Preset "NVBench" $preset $localOptions + if ($runTests) { + Test-Preset "NVBench" $preset + } } finally { if ($pushed) { Set-Location $initialPath diff --git a/cmake/NVBenchConfigTarget.cmake b/cmake/NVBenchConfigTarget.cmake index 536e9663..63a382b6 100644 --- a/cmake/NVBenchConfigTarget.cmake +++ b/cmake/NVBenchConfigTarget.cmake @@ -66,6 +66,12 @@ if (NVBench_TOPLEVEL_PROJECT AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") endif() endif() +if (MSVC) + # CCCL requires MSVC's conforming preprocessor when compiling CUDA sources + # with cl.exe as the host compiler. + nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "/Zc:preprocessor") +endif() + nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wall") nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wextra") nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wconversion") @@ -136,3 +142,28 @@ function(nvbench_config_target target_name) ) endif() endfunction() + +function(nvbench_config_test_runtime_environment test_name) + if (NOT WIN32) + return() + endif() + + set(path_modifications "") + if (TARGET nvbench) + list(APPEND path_modifications "PATH=path_list_prepend:$") + endif() + + if (TARGET nvbench::cupti) + get_property(cupti_runtime_lib TARGET nvbench::cupti PROPERTY IMPORTED_LOCATION) + if (cupti_runtime_lib) + cmake_path(GET cupti_runtime_lib PARENT_PATH cupti_runtime_dir) + list(APPEND path_modifications "PATH=path_list_prepend:$") + endif() + endif() + + if (path_modifications) + set_property(TEST ${test_name} + APPEND PROPERTY ENVIRONMENT_MODIFICATION ${path_modifications} + ) + endif() +endfunction() diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 061f8eb5..2abe3c7d 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -37,6 +37,7 @@ function (nvbench_add_examples_target target_prefix cuda_std) add_test(NAME ${example_name} COMMAND "$" ${example_args}) + nvbench_config_test_runtime_environment(${example_name}) # These should not deadlock. If they do, it may be that the CUDA context was created before # setting CUDA_MODULE_LOAD=EAGER in main, see NVIDIA/nvbench#136. diff --git a/exec/CMakeLists.txt b/exec/CMakeLists.txt index 775dccc9..7a9e88ec 100644 --- a/exec/CMakeLists.txt +++ b/exec/CMakeLists.txt @@ -9,35 +9,57 @@ add_dependencies(nvbench.all nvbench.ctl) nvbench_install_executables(nvbench.ctl) if (NVBench_ENABLE_TESTING) + set(ctl_test_names) + # Test: nvbench - add_test(NAME nvbench.ctl.no_args COMMAND "$") + set(test_name nvbench.ctl.no_args) + add_test(NAME ${test_name} COMMAND "$") + list(APPEND ctl_test_names ${test_name}) # Should print the version without any args: - set_property(TEST nvbench.ctl.no_args + set_property(TEST ${test_name} PROPERTY PASS_REGULAR_EXPRESSION "NVBench v" ) # Test: nvbench --version - add_test(NAME nvbench.ctl.version COMMAND "$" --version) + set(test_name nvbench.ctl.version) + add_test(NAME ${test_name} COMMAND "$" --version) + list(APPEND ctl_test_names ${test_name}) # Should print the version without any args: - set_property(TEST nvbench.ctl.version + set_property(TEST ${test_name} PROPERTY PASS_REGULAR_EXPRESSION "NVBench v" ) # Test: nvbench --list - add_test(NAME nvbench.ctl.list COMMAND "$" --list) + set(test_name nvbench.ctl.list) + add_test(NAME ${test_name} COMMAND "$" --list) + list(APPEND ctl_test_names ${test_name}) # Test: nvbench -l - add_test(NAME nvbench.ctl.l COMMAND "$" -l) + set(test_name nvbench.ctl.l) + add_test(NAME ${test_name} COMMAND "$" -l) + list(APPEND ctl_test_names ${test_name}) # Test: nvbench --help - add_test(NAME nvbench.ctl.help COMMAND "$" --help) + set(test_name nvbench.ctl.help) + add_test(NAME ${test_name} COMMAND "$" --help) + list(APPEND ctl_test_names ${test_name}) # Test: nvbench -h - add_test(NAME nvbench.ctl.h COMMAND "$" -h) + set(test_name nvbench.ctl.h) + add_test(NAME ${test_name} COMMAND "$" -h) + list(APPEND ctl_test_names ${test_name}) # Test: nvbench --help-axes - add_test(NAME nvbench.ctl.help_axes COMMAND "$" --help-axes) + set(test_name nvbench.ctl.help_axes) + add_test(NAME ${test_name} COMMAND "$" --help-axes) + list(APPEND ctl_test_names ${test_name}) # Test: nvbench --help-axis - add_test(NAME nvbench.ctl.help_axis COMMAND "$" --help-axis) + set(test_name nvbench.ctl.help_axis) + add_test(NAME ${test_name} COMMAND "$" --help-axis) + list(APPEND ctl_test_names ${test_name}) + + foreach(test_name IN LISTS ctl_test_names) + nvbench_config_test_runtime_environment(${test_name}) + endforeach() endif() diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt index bbf3e190..4f160923 100644 --- a/testing/CMakeLists.txt +++ b/testing/CMakeLists.txt @@ -47,6 +47,7 @@ foreach(test_src IN LISTS test_srcs) set_target_properties(${test_name} PROPERTIES COMPILE_FEATURES cuda_std_17) nvbench_config_target(${test_name}) add_test(NAME ${test_name} COMMAND "$" ${NVBench_TEST_ARGS_${test_name}}) + nvbench_config_test_runtime_environment(${test_name}) add_dependencies(nvbench.test.all ${test_name}) endforeach() diff --git a/testing/device/CMakeLists.txt b/testing/device/CMakeLists.txt index b7272ee1..918b5f1a 100644 --- a/testing/device/CMakeLists.txt +++ b/testing/device/CMakeLists.txt @@ -7,6 +7,7 @@ add_dependencies(nvbench.test.all ${test_name}) if (NVBench_ENABLE_DEVICE_TESTING) add_test(NAME ${test_name} COMMAND "$") + nvbench_config_test_runtime_environment(${test_name}) set_tests_properties(${test_name} PROPERTIES # Any timeouts/warnings are hard failures for this test. FAIL_REGULAR_EXPRESSION "Warn;timed out" From 03b8f025a4ee5b46311fb35854b4013cd1b39af8 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 10:23:45 -0500 Subject: [PATCH 24/56] Better fix to add /Zc:preprocessor that also propagates to header testing target --- cmake/NVBenchConfigTarget.cmake | 6 ------ nvbench/CMakeLists.txt | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cmake/NVBenchConfigTarget.cmake b/cmake/NVBenchConfigTarget.cmake index 63a382b6..b1c5f758 100644 --- a/cmake/NVBenchConfigTarget.cmake +++ b/cmake/NVBenchConfigTarget.cmake @@ -66,12 +66,6 @@ if (NVBench_TOPLEVEL_PROJECT AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") endif() endif() -if (MSVC) - # CCCL requires MSVC's conforming preprocessor when compiling CUDA sources - # with cl.exe as the host compiler. - nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "/Zc:preprocessor") -endif() - nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wall") nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wextra") nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wconversion") diff --git a/nvbench/CMakeLists.txt b/nvbench/CMakeLists.txt index 27cb62bf..6a436e73 100644 --- a/nvbench/CMakeLists.txt +++ b/nvbench/CMakeLists.txt @@ -101,6 +101,12 @@ target_link_libraries(nvbench fmt::fmt nvbench_json ) +target_compile_options(nvbench PUBLIC + # CCCL requires MSVC's conforming preprocessor when compiling CUDA sources + # with cl.exe as the host compiler. + $<$:/Zc:preprocessor> + $<$,$>:-Xcompiler=/Zc:preprocessor> +) # ################################################################################################## From 5ddc8477ab4a332c2dd2c3769c65113fb3529088 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 10:50:00 -0500 Subject: [PATCH 25/56] Address code rabbit concern --- testing/cmake/test_export/CMakeLists.txt | 42 ++++++++++++++++-------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/testing/cmake/test_export/CMakeLists.txt b/testing/cmake/test_export/CMakeLists.txt index 746b21eb..62828781 100644 --- a/testing/cmake/test_export/CMakeLists.txt +++ b/testing/cmake/test_export/CMakeLists.txt @@ -12,26 +12,40 @@ add_test(NAME nvbench_ctl COMMAND "$") # Setup runtime library paths for testing. # Unix uses LD_LIBRARY_PATH; Windows uses PATH for DLL lookup. -get_property(nvbench_config TARGET nvbench::nvbench - PROPERTY IMPORTED_CONFIGURATIONS -) -list(LENGTH nvbench_config num_configs) -if (num_configs GREATER 1) - message(WARNING - "Multiple IMPORTED_CONFIGURATIONS for nvbench::nvbench. " - "Picking the first one. This may cause issues." +function(get_imported_location out_var target_name) + get_property(imported_configs TARGET ${target_name} + PROPERTY IMPORTED_CONFIGURATIONS ) - list(GET nvbench_config 0 nvbench_config) -endif() + list(LENGTH imported_configs num_configs) + if (num_configs GREATER 1) + message(WARNING + "Multiple IMPORTED_CONFIGURATIONS for ${target_name}. " + "Picking the first one. This may cause issues." + ) + endif() + + if (num_configs GREATER 0) + list(GET imported_configs 0 imported_config) + get_property(imported_location TARGET ${target_name} + PROPERTY IMPORTED_LOCATION_${imported_config} + ) + else() + get_property(imported_location TARGET ${target_name} + PROPERTY IMPORTED_LOCATION + ) + endif() + + set(${out_var} "${imported_location}" PARENT_SCOPE) +endfunction() set(nvbench_lib_dir "") # On Unix the build tree uses RUNPATH so only the install tree needs the path. # On Windows there is no RUNPATH so we always need the DLL directory. if (WIN32 OR TEST_TYPE STREQUAL "INSTALL_TREE") - get_property(nvbench_lib TARGET nvbench::nvbench - PROPERTY IMPORTED_LOCATION_${nvbench_config} - ) - cmake_path(GET nvbench_lib PARENT_PATH nvbench_lib_dir) + get_imported_location(nvbench_lib nvbench::nvbench) + if (nvbench_lib) + cmake_path(GET nvbench_lib PARENT_PATH nvbench_lib_dir) + endif() endif() set(cupti_lib_dir "") From 76626e4390acbe6517e805af0ffe29ff903d7402 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 10:54:57 -0500 Subject: [PATCH 26/56] Validate before casting in PowerShell script --- ci/windows/install_cuda_profiler_api.ps1 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ci/windows/install_cuda_profiler_api.ps1 b/ci/windows/install_cuda_profiler_api.ps1 index c0a2fec8..b5697b1f 100644 --- a/ci/windows/install_cuda_profiler_api.ps1 +++ b/ci/windows/install_cuda_profiler_api.ps1 @@ -114,6 +114,10 @@ if (-not $CUDA_VERSION) { } } +if ($CUDA_VERSION -notmatch '^\d+\.\d+(\.\d+)?$') { + throw "Invalid CUDA version '$CUDA_VERSION'. Expected '.' or '..', for example '13.0' or '13.0.2'." +} + $version = [Version]$CUDA_VERSION $major = $version.Major $minor = $version.Minor From 3944851b4c5ce6c3c3d8a089896f2b696c98c523 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 11:11:46 -0500 Subject: [PATCH 27/56] decouple nvbench runtime path setup from cupti target detection --- testing/cmake/CMakeLists.txt | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/testing/cmake/CMakeLists.txt b/testing/cmake/CMakeLists.txt index af6d10b5..22e05b9b 100644 --- a/testing/cmake/CMakeLists.txt +++ b/testing/cmake/CMakeLists.txt @@ -44,13 +44,17 @@ function(nvbench_add_compile_test full_test_name_var subdir test_id) ${ARGN} --test-command "${CMAKE_CTEST_COMMAND}" --output-on-failure ) - if (WIN32 AND TARGET nvbench::cupti) - get_property(cupti_runtime_lib TARGET nvbench::cupti PROPERTY IMPORTED_LOCATION) - cmake_path(GET cupti_runtime_lib PARENT_PATH cupti_lib_dir) + if (WIN32) + set(path_mods "PATH=path_list_prepend:$") + if (TARGET nvbench::cupti) + get_property(cupti_runtime_lib TARGET nvbench::cupti PROPERTY IMPORTED_LOCATION) + cmake_path(GET cupti_runtime_lib PARENT_PATH cupti_lib_dir) + if (cupti_lib_dir) + list(PREPEND path_mods "PATH=path_list_prepend:$") + endif() + endif() set_property(TEST ${test_name} PROPERTY - ENVIRONMENT_MODIFICATION - "PATH=path_list_prepend:$" - "PATH=path_list_prepend:$" + ENVIRONMENT_MODIFICATION ${path_mods} ) endif() set(${full_test_name_var} ${test_name} PARENT_SCOPE) From 0c297053a98612c401cb2db1e9689876816a849a Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 11:12:36 -0500 Subject: [PATCH 28/56] Normalize multiple ARCH args --- .github/workflows/build-windows.yml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index 42a40231..103e3b0d 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -150,6 +150,7 @@ jobs: role-duration-seconds: 43200 - name: Validate Windows build inputs + id: validate_windows_build_inputs env: NVBENCH_WINDOWS_CUDA: ${{ inputs.cuda }} NVBENCH_WINDOWS_STD: ${{ inputs.std }} @@ -166,16 +167,22 @@ jobs: } $arch = "$env:NVBENCH_WINDOWS_ARCH".Trim() + $normalizedArch = $arch if ($arch) { if (@("all", "all-major", "native") -notcontains $arch) { - $archItems = @($arch -split '[;,]' | Where-Object { $_ }) + $archItems = @($arch -split '[;,]' | ForEach-Object { $_.Trim() } | Where-Object { $_ }) + if ($archItems.Length -eq 0) { + throw "Invalid CMAKE_CUDA_ARCHITECTURES value '$arch'. Expected empty, 'all', 'all-major', 'native', or a list like '80;90-real'." + } foreach ($archItem in $archItems) { if ($archItem -notmatch '^\d{2,3}(-real|-virtual)?$') { throw "Invalid CMAKE_CUDA_ARCHITECTURES value '$arch'. Expected empty, 'all', 'all-major', 'native', or a list like '80;90-real'." } } + $normalizedArch = $archItems -join ';' } } + Add-Content -Path $env:GITHUB_OUTPUT -Value "arch=$normalizedArch" - name: Fetch Windows devcontainer image run: | @@ -185,7 +192,7 @@ jobs: env: NVBENCH_WINDOWS_CUDA: ${{ inputs.cuda }} NVBENCH_WINDOWS_STD: ${{ inputs.std }} - NVBENCH_WINDOWS_ARCH: ${{ inputs.arch }} + NVBENCH_WINDOWS_ARCH: ${{ steps.validate_windows_build_inputs.outputs.arch }} NVBENCH_WINDOWS_GPU: ${{ inputs.gpu }} NVBENCH_WINDOWS_GPU_ARGS: ${{ inputs.gpu_args }} NVBENCH_WINDOWS_RUN_TESTS: ${{ inputs.run_tests }} From 806f1638e45aaf3e08dc1f6e298ff8ca39b46cfa Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 11:25:02 -0500 Subject: [PATCH 29/56] Better validation of gpu_args parameter --- .github/workflows/build-windows.yml | 32 ++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index 103e3b0d..8f9e3c56 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -39,10 +39,10 @@ on: type: boolean required: false default: false - gpu_args: + gpu_devices: type: string required: false - default: "--gpus all" + default: "all" run_tests: type: boolean required: false @@ -94,11 +94,11 @@ on: type: boolean required: false default: false - gpu_args: - description: "Docker arguments used when gpu is true" + gpu_devices: + description: "GPU devices to expose when gpu is true: 'all' or 'device=0,1'" type: string required: false - default: "--gpus all" + default: "all" run_tests: description: "Run CTest after building" type: boolean @@ -155,6 +155,8 @@ jobs: NVBENCH_WINDOWS_CUDA: ${{ inputs.cuda }} NVBENCH_WINDOWS_STD: ${{ inputs.std }} NVBENCH_WINDOWS_ARCH: ${{ inputs.arch }} + NVBENCH_WINDOWS_GPU: ${{ inputs.gpu }} + NVBENCH_WINDOWS_GPU_DEVICES: ${{ inputs.gpu_devices }} run: | $ErrorActionPreference = "Stop" @@ -184,6 +186,17 @@ jobs: } Add-Content -Path $env:GITHUB_OUTPUT -Value "arch=$normalizedArch" + $gpuDevices = "$env:NVBENCH_WINDOWS_GPU_DEVICES".Trim() + if ($env:NVBENCH_WINDOWS_GPU -eq "true") { + if (-not $gpuDevices) { + throw "GPU devices must not be empty when GPU support is enabled." + } + if ($gpuDevices -ne "all" -and $gpuDevices -notmatch '^device=\d+(,\d+)*$') { + throw "Invalid GPU devices value '$gpuDevices'. Expected 'all' or a device list like 'device=0,1'." + } + } + Add-Content -Path $env:GITHUB_OUTPUT -Value "gpu_devices=$gpuDevices" + - name: Fetch Windows devcontainer image run: | docker pull "$env:WINDOWS_CI_IMAGE" @@ -194,7 +207,7 @@ jobs: NVBENCH_WINDOWS_STD: ${{ inputs.std }} NVBENCH_WINDOWS_ARCH: ${{ steps.validate_windows_build_inputs.outputs.arch }} NVBENCH_WINDOWS_GPU: ${{ inputs.gpu }} - NVBENCH_WINDOWS_GPU_ARGS: ${{ inputs.gpu_args }} + NVBENCH_WINDOWS_GPU_DEVICES: ${{ steps.validate_windows_build_inputs.outputs.gpu_devices }} NVBENCH_WINDOWS_RUN_TESTS: ${{ inputs.run_tests }} NVBENCH_WINDOWS_DEVICE_TESTING: ${{ inputs.device_testing }} run: | @@ -245,11 +258,10 @@ jobs: ) if ($env:NVBENCH_WINDOWS_GPU -eq "true") { - $dockerGpuArgs = @($env:NVBENCH_WINDOWS_GPU_ARGS -split '\s+' | Where-Object { $_ }) - if ($dockerGpuArgs.Length -eq 0) { - throw "NVBENCH_WINDOWS_GPU_ARGS must not be empty when GPU support is enabled." + if (-not $env:NVBENCH_WINDOWS_GPU_DEVICES) { + throw "NVBENCH_WINDOWS_GPU_DEVICES must not be empty when GPU support is enabled." } - $dockerArgs += $dockerGpuArgs + $dockerArgs += @("--gpus", $env:NVBENCH_WINDOWS_GPU_DEVICES) } $dockerArgs += @( From 90c2386447fd4a1fbaa7137a0b9d9b9be909f425 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 11:26:36 -0500 Subject: [PATCH 30/56] use get_imported_location to get CUPTI library to improve multi-config support --- testing/cmake/test_export/CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/testing/cmake/test_export/CMakeLists.txt b/testing/cmake/test_export/CMakeLists.txt index 62828781..dd2afce4 100644 --- a/testing/cmake/test_export/CMakeLists.txt +++ b/testing/cmake/test_export/CMakeLists.txt @@ -50,8 +50,10 @@ endif() set(cupti_lib_dir "") if (TARGET nvbench::cupti) - get_property(cupti_lib TARGET nvbench::cupti PROPERTY IMPORTED_LOCATION) - cmake_path(GET cupti_lib PARENT_PATH cupti_lib_dir) + get_imported_location(cupti_lib nvbench::cupti) + if (cupti_lib) + cmake_path(GET cupti_lib PARENT_PATH cupti_lib_dir) + endif() endif() function(join_nonempty_paths out_var separator) From 737bb5828c271f6957d50d82986316a514f170f3 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 11:40:54 -0500 Subject: [PATCH 31/56] Validation of combinations of gpu, run_tests and device_testing --- .github/workflows/build-windows.yml | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index 8f9e3c56..305fd5f2 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -157,6 +157,8 @@ jobs: NVBENCH_WINDOWS_ARCH: ${{ inputs.arch }} NVBENCH_WINDOWS_GPU: ${{ inputs.gpu }} NVBENCH_WINDOWS_GPU_DEVICES: ${{ inputs.gpu_devices }} + NVBENCH_WINDOWS_RUN_TESTS: ${{ inputs.run_tests }} + NVBENCH_WINDOWS_DEVICE_TESTING: ${{ inputs.device_testing }} run: | $ErrorActionPreference = "Stop" @@ -168,6 +170,14 @@ jobs: throw "Invalid C++ standard '$env:NVBENCH_WINDOWS_STD'. Expected '17' or '20'." } + $gpuEnabled = $env:NVBENCH_WINDOWS_GPU -eq "true" + $runTests = $env:NVBENCH_WINDOWS_RUN_TESTS -eq "true" + $deviceTesting = $env:NVBENCH_WINDOWS_DEVICE_TESTING -eq "true" + + if ($runTests -and $deviceTesting -and -not $gpuEnabled) { + throw "Invalid Windows build inputs: run_tests=true and device_testing=true require gpu=true." + } + $arch = "$env:NVBENCH_WINDOWS_ARCH".Trim() $normalizedArch = $arch if ($arch) { @@ -187,13 +197,14 @@ jobs: Add-Content -Path $env:GITHUB_OUTPUT -Value "arch=$normalizedArch" $gpuDevices = "$env:NVBENCH_WINDOWS_GPU_DEVICES".Trim() - if ($env:NVBENCH_WINDOWS_GPU -eq "true") { - if (-not $gpuDevices) { - throw "GPU devices must not be empty when GPU support is enabled." - } - if ($gpuDevices -ne "all" -and $gpuDevices -notmatch '^device=\d+(,\d+)*$') { - throw "Invalid GPU devices value '$gpuDevices'. Expected 'all' or a device list like 'device=0,1'." - } + if ($gpuDevices -and $gpuDevices -ne "all" -and $gpuDevices -notmatch '^device=\d+(,\d+)*$') { + throw "Invalid GPU devices value '$gpuDevices'. Expected 'all' or a device list like 'device=0,1'." + } + if ($gpuEnabled -and -not $gpuDevices) { + throw "GPU devices must not be empty when GPU support is enabled." + } + if (-not $gpuEnabled -and $gpuDevices -and $gpuDevices -ne "all") { + throw "Invalid Windows build inputs: gpu_devices may only select specific devices when gpu=true." } Add-Content -Path $env:GITHUB_OUTPUT -Value "gpu_devices=$gpuDevices" From fcfb9b65d4533a1fe63d7af9fe7d366e9f7c2784 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 11:45:39 -0500 Subject: [PATCH 32/56] Resolve code-rabbit concern in handling multiple imported configurations to match build type, if set --- testing/cmake/test_export/CMakeLists.txt | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/testing/cmake/test_export/CMakeLists.txt b/testing/cmake/test_export/CMakeLists.txt index dd2afce4..6328e34f 100644 --- a/testing/cmake/test_export/CMakeLists.txt +++ b/testing/cmake/test_export/CMakeLists.txt @@ -20,12 +20,22 @@ function(get_imported_location out_var target_name) if (num_configs GREATER 1) message(WARNING "Multiple IMPORTED_CONFIGURATIONS for ${target_name}. " - "Picking the first one. This may cause issues." + "Picking CMAKE_BUILD_TYPE if present, otherwise the first one." ) endif() if (num_configs GREATER 0) - list(GET imported_configs 0 imported_config) + if (CMAKE_BUILD_TYPE) + string(TOUPPER "${CMAKE_BUILD_TYPE}" build_type) + list(FIND imported_configs "${build_type}" imported_config_index) + else() + set(imported_config_index -1) + endif() + if (imported_config_index GREATER_EQUAL 0) + list(GET imported_configs ${imported_config_index} imported_config) + else() + list(GET imported_configs 0 imported_config) + endif() get_property(imported_location TARGET ${target_name} PROPERTY IMPORTED_LOCATION_${imported_config} ) From 35a0d59d7afa701fd2c2ca4a4e3860d23c0b0817 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 12:03:49 -0500 Subject: [PATCH 33/56] Reject GPU requests for forks --- .github/workflows/build-windows.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index 305fd5f2..2c1d9cbe 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -159,6 +159,7 @@ jobs: NVBENCH_WINDOWS_GPU_DEVICES: ${{ inputs.gpu_devices }} NVBENCH_WINDOWS_RUN_TESTS: ${{ inputs.run_tests }} NVBENCH_WINDOWS_DEVICE_TESTING: ${{ inputs.device_testing }} + NVBENCH_GITHUB_REPOSITORY: ${{ github.repository }} run: | $ErrorActionPreference = "Stop" @@ -174,6 +175,10 @@ jobs: $runTests = $env:NVBENCH_WINDOWS_RUN_TESTS -eq "true" $deviceTesting = $env:NVBENCH_WINDOWS_DEVICE_TESTING -eq "true" + if ($gpuEnabled -and $env:NVBENCH_GITHUB_REPOSITORY -ne "NVIDIA/nvbench") { + throw "Invalid Windows build inputs: gpu=true is only supported in NVIDIA/nvbench, where the configured runner can be selected." + } + if ($runTests -and $deviceTesting -and -not $gpuEnabled) { throw "Invalid Windows build inputs: run_tests=true and device_testing=true require gpu=true." } From 5a08b812f9488ad3ec9f3d98f981afa49b1502fb Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 12:10:53 -0500 Subject: [PATCH 34/56] Prevents installing cuda_profiler_api.h into one toolkit while CMake builds with another. --- ci/windows/install_cuda_profiler_api.ps1 | 56 ++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/ci/windows/install_cuda_profiler_api.ps1 b/ci/windows/install_cuda_profiler_api.ps1 index b5697b1f..0f3b288c 100644 --- a/ci/windows/install_cuda_profiler_api.ps1 +++ b/ci/windows/install_cuda_profiler_api.ps1 @@ -22,6 +22,43 @@ function Get-CudaVersionFromPath { return "" } +function Get-CudaRootFromNvcc { + $nvccCommand = Get-Command "nvcc.exe" -ErrorAction SilentlyContinue + if (-not $nvccCommand) { + return "" + } + + $nvccPath = $nvccCommand.Source + $binDir = Split-Path -Parent $nvccPath + if ((Split-Path -Leaf $binDir) -ne "bin") { + throw "Could not derive CUDA root from nvcc.exe path: $nvccPath" + } + + return Split-Path -Parent $binDir +} + +function Assert-SamePath { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$Left, + + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$Right, + + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$Message + ) + + $leftFullPath = [System.IO.Path]::GetFullPath($Left).TrimEnd('\', '/') + $rightFullPath = [System.IO.Path]::GetFullPath($Right).TrimEnd('\', '/') + if ($leftFullPath -ne $rightFullPath) { + throw "$Message Left='$leftFullPath' Right='$rightFullPath'" + } +} + function Assert-NvidiaAuthenticodeSignature { Param( [Parameter(Mandatory = $true)] @@ -130,6 +167,17 @@ if ($build -lt 0) { $mmbVersionTag = "${major}.${minor}.${build}" $mmVersionTag = "${major}.${minor}" +$nvccCudaRoot = Get-CudaRootFromNvcc +if ($nvccCudaRoot) { + $nvccCudaVersion = Get-CudaVersionFromPath -Path $nvccCudaRoot + if (-not $nvccCudaVersion) { + throw "Could not determine CUDA version from active nvcc.exe root: $nvccCudaRoot" + } + if ($nvccCudaVersion -ne $mmVersionTag) { + throw "Active nvcc.exe is from CUDA $nvccCudaVersion, but CUDA $mmVersionTag was requested." + } +} + if ($env:CUDA_PATH) { $cudaPathVersion = Get-CudaVersionFromPath -Path $env:CUDA_PATH if (-not $cudaPathVersion) { @@ -138,7 +186,15 @@ if ($env:CUDA_PATH) { if ($cudaPathVersion -ne $mmVersionTag) { throw "CUDA_PATH points to CUDA $cudaPathVersion, but CUDA $mmVersionTag was requested." } + if ($nvccCudaRoot) { + Assert-SamePath ` + -Left $env:CUDA_PATH ` + -Right $nvccCudaRoot ` + -Message "CUDA_PATH and active nvcc.exe point to different CUDA Toolkit roots." + } $cudaRoot = $env:CUDA_PATH +} elseif ($nvccCudaRoot) { + $cudaRoot = $nvccCudaRoot } else { $cudaRoot = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$mmVersionTag" } From 0be5c9cce8f21ee0e9275d94ff9309840520c925 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 12:18:51 -0500 Subject: [PATCH 35/56] Fail fast for deterministic client errors returned by download request --- ci/windows/install_cuda_profiler_api.ps1 | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ci/windows/install_cuda_profiler_api.ps1 b/ci/windows/install_cuda_profiler_api.ps1 index 0f3b288c..c98fd379 100644 --- a/ci/windows/install_cuda_profiler_api.ps1 +++ b/ci/windows/install_cuda_profiler_api.ps1 @@ -129,7 +129,10 @@ function Invoke-WebRequestWithRetry { return } catch { $statusCode = Get-HttpStatusCodeFromError -ErrorRecord $_ - if ($statusCode -ge 400 -and $statusCode -lt 500) { + # Fail fast for deterministic client errors that indicate a bad URL, + # missing installer, or unsupported method. Keep 408/429 and 5xx on + # the retry path because they are commonly transient in CI. + if (@(400, 401, 403, 404, 405, 410, 414) -contains $statusCode) { throw "Download failed with non-retryable HTTP status $statusCode from '$Uri'. $_" } From 2dba74d3091865f15e4083103f81163e336787cc Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 12:23:54 -0500 Subject: [PATCH 36/56] more robust imported_location computation --- testing/cmake/test_export/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/testing/cmake/test_export/CMakeLists.txt b/testing/cmake/test_export/CMakeLists.txt index 6328e34f..5e6c8b93 100644 --- a/testing/cmake/test_export/CMakeLists.txt +++ b/testing/cmake/test_export/CMakeLists.txt @@ -39,7 +39,9 @@ function(get_imported_location out_var target_name) get_property(imported_location TARGET ${target_name} PROPERTY IMPORTED_LOCATION_${imported_config} ) - else() + endif() + + if (NOT imported_location) get_property(imported_location TARGET ${target_name} PROPERTY IMPORTED_LOCATION ) From fedba46a2569af558baf9b4d6e85e07b1e6654f1 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 12:36:05 -0500 Subject: [PATCH 37/56] Make Linux also use ENVIRONMENT_MODIFICATION to simplify code --- testing/cmake/test_export/CMakeLists.txt | 49 +++++++++++------------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/testing/cmake/test_export/CMakeLists.txt b/testing/cmake/test_export/CMakeLists.txt index 5e6c8b93..5bfbe540 100644 --- a/testing/cmake/test_export/CMakeLists.txt +++ b/testing/cmake/test_export/CMakeLists.txt @@ -68,25 +68,6 @@ if (TARGET nvbench::cupti) endif() endif() -function(join_nonempty_paths out_var separator) - set(paths "") - foreach(path IN LISTS ARGN) - if (path) - list(APPEND paths "${path}") - endif() - endforeach() - list(JOIN paths "${separator}" joined_paths) - set(${out_var} "${joined_paths}" PARENT_SCOPE) -endfunction() - -function(prepend_runtime_path out_var path_var separator env_var) - if (DEFINED ENV{${env_var}} AND NOT "$ENV{${env_var}}" STREQUAL "") - set(${out_var} "${${path_var}}${separator}$ENV{${env_var}}" PARENT_SCOPE) - else() - set(${out_var} "${${path_var}}" PARENT_SCOPE) - endif() -endfunction() - if (WIN32) set(path_modifications "") if (cupti_lib_dir) @@ -104,18 +85,32 @@ if (WIN32) ) endif() else() - join_nonempty_paths(cupti_ld_library_path ":" "${cupti_lib_dir}") - join_nonempty_paths(nvbench_ld_library_path ":" "${nvbench_lib_dir}" "${cupti_lib_dir}") - if (cupti_ld_library_path) - prepend_runtime_path(test_bench_ld_library_path cupti_ld_library_path ":" LD_LIBRARY_PATH) + set(test_bench_ld_modifications "") + if (cupti_lib_dir) + list(APPEND test_bench_ld_modifications + "LD_LIBRARY_PATH=path_list_prepend:$" + ) + endif() + if (test_bench_ld_modifications) set_property(TEST test_bench PROPERTY - ENVIRONMENT "LD_LIBRARY_PATH=${test_bench_ld_library_path}" + ENVIRONMENT_MODIFICATION ${test_bench_ld_modifications} + ) + endif() + + set(nvbench_ctl_ld_modifications "") + if (cupti_lib_dir) + list(APPEND nvbench_ctl_ld_modifications + "LD_LIBRARY_PATH=path_list_prepend:$" + ) + endif() + if (nvbench_lib_dir) + list(APPEND nvbench_ctl_ld_modifications + "LD_LIBRARY_PATH=path_list_prepend:$" ) endif() - if (nvbench_ld_library_path) - prepend_runtime_path(nvbench_ctl_ld_library_path nvbench_ld_library_path ":" LD_LIBRARY_PATH) + if (nvbench_ctl_ld_modifications) set_property(TEST nvbench_ctl PROPERTY - ENVIRONMENT "LD_LIBRARY_PATH=${nvbench_ctl_ld_library_path}" + ENVIRONMENT_MODIFICATION ${nvbench_ctl_ld_modifications} ) endif() endif() From 02a4a7e4dd3dae71f629f511c6f767e24920bbae Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 12:45:23 -0500 Subject: [PATCH 38/56] run_tests=false is not allows when device_testing=true --- .github/workflows/build-windows.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index 2c1d9cbe..b5b8099e 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -179,6 +179,10 @@ jobs: throw "Invalid Windows build inputs: gpu=true is only supported in NVIDIA/nvbench, where the configured runner can be selected." } + if ($deviceTesting -and -not $runTests) { + throw "Invalid Windows build inputs: device_testing=true requires run_tests=true." + } + if ($runTests -and $deviceTesting -and -not $gpuEnabled) { throw "Invalid Windows build inputs: run_tests=true and device_testing=true require gpu=true." } From 2a904bbc4a4e8307c7986db746798acb15e05de0 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 13:11:49 -0500 Subject: [PATCH 39/56] Specify Windows CUDA toolkit version major.minor.patch, derive devcontainer tag from full spec --- .github/workflows/build-windows.yml | 17 +++++++++++------ ci/matrix.yaml | 3 ++- ci/windows/install_cuda_profiler_api.ps1 | 13 +++---------- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index b5b8099e..2f7bf809 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -10,7 +10,7 @@ on: cuda: type: string required: false - default: "13.0" + default: "13.0.2" host: type: string required: false @@ -54,10 +54,10 @@ on: workflow_dispatch: inputs: cuda: - description: "CUDA Toolkit version" + description: "Full CUDA Toolkit version" type: string required: false - default: "13.0" + default: "13.0.2" host: description: "Host compiler tag" type: string @@ -121,7 +121,6 @@ jobs: id-token: write contents: read env: - WINDOWS_CI_IMAGE: ${{ inputs.image != '' && inputs.image || format('rapidsai/devcontainers:{0}-cuda{1}-{2}', inputs.devcontainer_version, inputs.cuda, inputs.host) }} SCCACHE_BUCKET: rapids-sccache-devs SCCACHE_REGION: us-east-2 SCCACHE_IDLE_TIMEOUT: "0" @@ -163,9 +162,12 @@ jobs: run: | $ErrorActionPreference = "Stop" - if ($env:NVBENCH_WINDOWS_CUDA -notmatch '^\d+\.\d+$') { - throw "Invalid CUDA version '$env:NVBENCH_WINDOWS_CUDA'. Expected '.', for example '13.0'." + if ($env:NVBENCH_WINDOWS_CUDA -notmatch '^\d+\.\d+\.\d+$') { + throw "Invalid CUDA version '$env:NVBENCH_WINDOWS_CUDA'. Expected '..', for example '13.0.2'." } + $cudaVersionParts = $env:NVBENCH_WINDOWS_CUDA.Split(".") + $cudaImageVersion = "$($cudaVersionParts[0]).$($cudaVersionParts[1])" + Add-Content -Path $env:GITHUB_OUTPUT -Value "cuda_image_version=$cudaImageVersion" if (@("17", "20") -notcontains $env:NVBENCH_WINDOWS_STD) { throw "Invalid C++ standard '$env:NVBENCH_WINDOWS_STD'. Expected '17' or '20'." @@ -218,6 +220,8 @@ jobs: Add-Content -Path $env:GITHUB_OUTPUT -Value "gpu_devices=$gpuDevices" - name: Fetch Windows devcontainer image + env: + WINDOWS_CI_IMAGE: ${{ inputs.image != '' && inputs.image || format('rapidsai/devcontainers:{0}-cuda{1}-{2}', inputs.devcontainer_version, steps.validate_windows_build_inputs.outputs.cuda_image_version, inputs.host) }} run: | docker pull "$env:WINDOWS_CI_IMAGE" @@ -230,6 +234,7 @@ jobs: NVBENCH_WINDOWS_GPU_DEVICES: ${{ steps.validate_windows_build_inputs.outputs.gpu_devices }} NVBENCH_WINDOWS_RUN_TESTS: ${{ inputs.run_tests }} NVBENCH_WINDOWS_DEVICE_TESTING: ${{ inputs.device_testing }} + WINDOWS_CI_IMAGE: ${{ inputs.image != '' && inputs.image || format('rapidsai/devcontainers:{0}-cuda{1}-{2}', inputs.devcontainer_version, steps.validate_windows_build_inputs.outputs.cuda_image_version, inputs.host) }} run: | $ErrorActionPreference = "Stop" diff --git a/ci/matrix.yaml b/ci/matrix.yaml index 8b1d0db0..5b607edc 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -1,6 +1,7 @@ cuda_curr_min: &cuda_prev_min '12.0' cuda_curr_max: &cuda_prev_max '12.9' cuda_curr_max: &cuda_curr_max '13.0' +cuda_curr_max_windows: &cuda_curr_max_windows '13.0.2' # The version of the devcontainer images to use from https://hub.docker.com/r/rapidsai/devcontainers devcontainer_version: '25.12' @@ -72,7 +73,7 @@ pull_request: - {cuda: *cuda_curr_max, compiler: *llvm19, cpu: 'amd64'} - {cuda: *cuda_curr_max, compiler: *llvm20, cpu: 'amd64'} windows: - - {cuda: *cuda_curr_max, compiler: *msvc2022, cpu: 'amd64', std: '17'} + - {cuda: *cuda_curr_max_windows, compiler: *msvc2022, cpu: 'amd64', std: '17'} # Python wheel builds python_wheels: diff --git a/ci/windows/install_cuda_profiler_api.ps1 b/ci/windows/install_cuda_profiler_api.ps1 index c98fd379..909089d8 100644 --- a/ci/windows/install_cuda_profiler_api.ps1 +++ b/ci/windows/install_cuda_profiler_api.ps1 @@ -148,14 +148,11 @@ function Invoke-WebRequestWithRetry { } if (-not $CUDA_VERSION) { - $CUDA_VERSION = Get-CudaVersionFromPath -Path $env:CUDA_PATH - if (-not $CUDA_VERSION) { - throw "Could not determine CUDA version. Provide -cudaVersion or set CUDA_PATH to a path ending in v.." - } + throw "CUDA installer version is required. Provide -cudaVersion .., for example '13.0.2'." } -if ($CUDA_VERSION -notmatch '^\d+\.\d+(\.\d+)?$') { - throw "Invalid CUDA version '$CUDA_VERSION'. Expected '.' or '..', for example '13.0' or '13.0.2'." +if ($CUDA_VERSION -notmatch '^\d+\.\d+\.\d+$') { + throw "Invalid CUDA installer version '$CUDA_VERSION'. Expected '..', for example '13.0.2'." } $version = [Version]$CUDA_VERSION @@ -163,10 +160,6 @@ $major = $version.Major $minor = $version.Minor $build = $version.Build -if ($build -lt 0) { - $build = 0 -} - $mmbVersionTag = "${major}.${minor}.${build}" $mmVersionTag = "${major}.${minor}" From 48a77bb5a9506776197b25f86d2fa0e7fd414981 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 13:32:14 -0500 Subject: [PATCH 40/56] Handle edge case when multiple CUPTI dlls exist, pick up, warn, do not fail --- cmake/NVBenchCUPTI.cmake | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cmake/NVBenchCUPTI.cmake b/cmake/NVBenchCUPTI.cmake index acff09ff..3cce8d7f 100644 --- a/cmake/NVBenchCUPTI.cmake +++ b/cmake/NVBenchCUPTI.cmake @@ -48,14 +48,18 @@ function(nvbench_find_windows_cupti_runtime_library out_var dep_name library_pat endif() if (runtime_libraries) + list(SORT runtime_libraries COMPARE NATURAL ORDER DESCENDING) list(LENGTH runtime_libraries num_runtime_libraries) if (num_runtime_libraries GREATER 1) - message(FATAL_ERROR - "Found multiple runtime DLLs for ${dep_name}: ${runtime_libraries}" + list(GET runtime_libraries 0 runtime_library) + message(WARNING + "Found multiple runtime DLLs for ${dep_name}; selecting " + "${runtime_library}. Candidates: ${runtime_libraries}" ) + else() + list(GET runtime_libraries 0 runtime_library) endif() - list(GET runtime_libraries 0 runtime_library) set(${out_var} "${runtime_library}" PARENT_SCOPE) return() endif() From 639e9ae8543c30d21e18a89d9b8165e30e2523d6 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 13:42:45 -0500 Subject: [PATCH 41/56] Always specify -DNVBench_ENABLE_DEVICE_TESTING=VAL per value of --- ci/windows/build_nvbench.ps1 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/windows/build_nvbench.ps1 b/ci/windows/build_nvbench.ps1 index e16f57e3..dcc6950f 100644 --- a/ci/windows/build_nvbench.ps1 +++ b/ci/windows/build_nvbench.ps1 @@ -67,6 +67,8 @@ try { ) if ($deviceTesting) { $localOptions += "-DNVBench_ENABLE_DEVICE_TESTING=ON" + } else { + $localOptions += "-DNVBench_ENABLE_DEVICE_TESTING=OFF" } Configure-And-Build-Preset "NVBench" $preset $localOptions From ccde292f052209612f10536ba8293c3d1c39c625 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 14:15:18 -0500 Subject: [PATCH 42/56] Back to cuda major.minor being input What CUDA Profiler API to install is determined from redist information stored in version.json stored at root of CUDA Toolkit. If version.json is not found, an error occurs --- .github/workflows/build-windows.yml | 17 +- ci/matrix.yaml | 3 +- ci/windows/install_cuda_profiler_api.ps1 | 412 +++++++++++++++++++---- 3 files changed, 355 insertions(+), 77 deletions(-) diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index 2f7bf809..afb4e743 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -10,7 +10,7 @@ on: cuda: type: string required: false - default: "13.0.2" + default: "13.0" host: type: string required: false @@ -54,10 +54,10 @@ on: workflow_dispatch: inputs: cuda: - description: "Full CUDA Toolkit version" + description: "CUDA Toolkit major.minor version" type: string required: false - default: "13.0.2" + default: "13.0" host: description: "Host compiler tag" type: string @@ -162,12 +162,9 @@ jobs: run: | $ErrorActionPreference = "Stop" - if ($env:NVBENCH_WINDOWS_CUDA -notmatch '^\d+\.\d+\.\d+$') { - throw "Invalid CUDA version '$env:NVBENCH_WINDOWS_CUDA'. Expected '..', for example '13.0.2'." + if ($env:NVBENCH_WINDOWS_CUDA -notmatch '^\d+\.\d+$') { + throw "Invalid CUDA version '$env:NVBENCH_WINDOWS_CUDA'. Expected '.', for example '13.0'." } - $cudaVersionParts = $env:NVBENCH_WINDOWS_CUDA.Split(".") - $cudaImageVersion = "$($cudaVersionParts[0]).$($cudaVersionParts[1])" - Add-Content -Path $env:GITHUB_OUTPUT -Value "cuda_image_version=$cudaImageVersion" if (@("17", "20") -notcontains $env:NVBENCH_WINDOWS_STD) { throw "Invalid C++ standard '$env:NVBENCH_WINDOWS_STD'. Expected '17' or '20'." @@ -221,7 +218,7 @@ jobs: - name: Fetch Windows devcontainer image env: - WINDOWS_CI_IMAGE: ${{ inputs.image != '' && inputs.image || format('rapidsai/devcontainers:{0}-cuda{1}-{2}', inputs.devcontainer_version, steps.validate_windows_build_inputs.outputs.cuda_image_version, inputs.host) }} + WINDOWS_CI_IMAGE: ${{ inputs.image != '' && inputs.image || format('rapidsai/devcontainers:{0}-cuda{1}-{2}', inputs.devcontainer_version, inputs.cuda, inputs.host) }} run: | docker pull "$env:WINDOWS_CI_IMAGE" @@ -234,7 +231,7 @@ jobs: NVBENCH_WINDOWS_GPU_DEVICES: ${{ steps.validate_windows_build_inputs.outputs.gpu_devices }} NVBENCH_WINDOWS_RUN_TESTS: ${{ inputs.run_tests }} NVBENCH_WINDOWS_DEVICE_TESTING: ${{ inputs.device_testing }} - WINDOWS_CI_IMAGE: ${{ inputs.image != '' && inputs.image || format('rapidsai/devcontainers:{0}-cuda{1}-{2}', inputs.devcontainer_version, steps.validate_windows_build_inputs.outputs.cuda_image_version, inputs.host) }} + WINDOWS_CI_IMAGE: ${{ inputs.image != '' && inputs.image || format('rapidsai/devcontainers:{0}-cuda{1}-{2}', inputs.devcontainer_version, inputs.cuda, inputs.host) }} run: | $ErrorActionPreference = "Stop" diff --git a/ci/matrix.yaml b/ci/matrix.yaml index 5b607edc..8b1d0db0 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -1,7 +1,6 @@ cuda_curr_min: &cuda_prev_min '12.0' cuda_curr_max: &cuda_prev_max '12.9' cuda_curr_max: &cuda_curr_max '13.0' -cuda_curr_max_windows: &cuda_curr_max_windows '13.0.2' # The version of the devcontainer images to use from https://hub.docker.com/r/rapidsai/devcontainers devcontainer_version: '25.12' @@ -73,7 +72,7 @@ pull_request: - {cuda: *cuda_curr_max, compiler: *llvm19, cpu: 'amd64'} - {cuda: *cuda_curr_max, compiler: *llvm20, cpu: 'amd64'} windows: - - {cuda: *cuda_curr_max_windows, compiler: *msvc2022, cpu: 'amd64', std: '17'} + - {cuda: *cuda_curr_max, compiler: *msvc2022, cpu: 'amd64', std: '17'} # Python wheel builds python_wheels: diff --git a/ci/windows/install_cuda_profiler_api.ps1 b/ci/windows/install_cuda_profiler_api.ps1 index 909089d8..5f11de2a 100644 --- a/ci/windows/install_cuda_profiler_api.ps1 +++ b/ci/windows/install_cuda_profiler_api.ps1 @@ -9,6 +9,8 @@ Param( $ErrorActionPreference = "Stop" +$RedistRootUri = "https://developer.download.nvidia.com/compute/cuda/redist" + function Get-CudaVersionFromPath { Param( [Parameter(Mandatory = $false)] @@ -59,30 +61,6 @@ function Assert-SamePath { } } -function Assert-NvidiaAuthenticodeSignature { - Param( - [Parameter(Mandatory = $true)] - [ValidateNotNullOrEmpty()] - [string]$Path - ) - - $signature = Get-AuthenticodeSignature -FilePath $Path - if ($signature.Status -ne "Valid") { - throw "Invalid Authenticode signature for '$Path': $($signature.Status) $($signature.StatusMessage)" - } - - $expectedPublisher = "NVIDIA Corporation" - $publisher = $signature.SignerCertificate.GetNameInfo( - [System.Security.Cryptography.X509Certificates.X509NameType]::SimpleName, - $false - ) - if ($publisher -ne $expectedPublisher) { - throw "Unexpected signer for '$Path': $publisher" - } - - Write-Host "Validated Authenticode signature for '$Path': $publisher" -} - function Get-HttpStatusCodeFromError { Param( [Parameter(Mandatory = $true)] @@ -130,7 +108,7 @@ function Invoke-WebRequestWithRetry { } catch { $statusCode = Get-HttpStatusCodeFromError -ErrorRecord $_ # Fail fast for deterministic client errors that indicate a bad URL, - # missing installer, or unsupported method. Keep 408/429 and 5xx on + # missing package, or unsupported method. Keep 408/429 and 5xx on # the retry path because they are commonly transient in CI. if (@(400, 401, 403, 404, 405, 410, 414) -contains $statusCode) { throw "Download failed with non-retryable HTTP status $statusCode from '$Uri'. $_" @@ -147,25 +125,345 @@ function Invoke-WebRequestWithRetry { } } +function Read-JsonFile { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$Path + ) + + try { + $content = Get-Content -LiteralPath $Path -Raw + $json = $content | ConvertFrom-Json + return $json + } catch { + throw "Failed to parse JSON file '$Path'. $_" + } +} + +function Get-JsonPropertyValue { + Param( + [Parameter(Mandatory = $true)] + $Object, + + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$Name + ) + + if ($null -eq $Object) { + return $null + } + + $property = $Object.PSObject.Properties[$Name] + if (-not $property) { + return $null + } + + return $property.Value +} + +function Get-ComponentVersion { + Param( + [Parameter(Mandatory = $true)] + $JsonObject, + + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$ComponentName + ) + + $component = Get-JsonPropertyValue -Object $JsonObject -Name $ComponentName + if ($null -eq $component) { + return "" + } + + $version = Get-JsonPropertyValue -Object $component -Name "version" + if ($null -eq $version) { + return "" + } + + return [string]$version +} + +function Get-CudaVersionFromRoot { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$CudaRoot + ) + + $pathVersion = Get-CudaVersionFromPath -Path $CudaRoot + if ($pathVersion) { + return $pathVersion + } + + $versionJson = Join-Path $CudaRoot "version.json" + if (Test-Path $versionJson) { + $versionData = Read-JsonFile -Path $versionJson + $cudaVersion = Get-ComponentVersion -JsonObject $versionData -ComponentName "cuda" + if ($cudaVersion -match '^(?\d+\.\d+)(\.|$)') { + return $Matches.version + } + } + + return "" +} + +function Assert-Sha256 { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$Path, + + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$ExpectedSha256 + ) + + $actualSha256 = (Get-FileHash -LiteralPath $Path -Algorithm SHA256).Hash.ToLowerInvariant() + $expectedSha256 = $ExpectedSha256.ToLowerInvariant() + if ($actualSha256 -ne $expectedSha256) { + throw "SHA256 mismatch for '$Path'. Expected '$expectedSha256', got '$actualSha256'." + } + + Write-Host "Validated SHA256 for '$Path': $actualSha256" +} + +function Get-RedistribManifestNames { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$CudaVersionTag + ) + + $indexFile = Join-Path $env:TEMP "cuda_redist_index_$PID.html" + try { + Invoke-WebRequestWithRetry -Uri "$RedistRootUri/" -OutFile $indexFile + $indexContent = Get-Content -LiteralPath $indexFile -Raw + } finally { + Remove-Item $indexFile -ErrorAction SilentlyContinue + } + + $pattern = "redistrib_$([regex]::Escape($CudaVersionTag))\.\d+\.json" + $manifestNames = @( + [regex]::Matches($indexContent, $pattern) | + ForEach-Object { $_.Value } | + Sort-Object -Unique + ) + + if ($manifestNames.Count -eq 0) { + throw "No CUDA $CudaVersionTag redistrib manifests were found at $RedistRootUri." + } + + return @( + $manifestNames | + ForEach-Object { + [PSCustomObject]@{ + Name = $_ + Version = [Version](($_ -replace '^redistrib_', '') -replace '\.json$', '') + } + } | + Sort-Object -Property Version -Descending | + ForEach-Object { $_.Name } + ) +} + +function Read-RedistManifest { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$ManifestName + ) + + $manifestFile = Join-Path $env:TEMP $ManifestName + try { + Invoke-WebRequestWithRetry -Uri "$RedistRootUri/$ManifestName" -OutFile $manifestFile + return Read-JsonFile -Path $manifestFile + } finally { + Remove-Item $manifestFile -ErrorAction SilentlyContinue + } +} + +function Select-ProfilerApiManifest { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$CudaVersionTag, + + [Parameter(Mandatory = $true)] + $VersionData + ) + + $localProfilerApiVersion = Get-ComponentVersion ` + -JsonObject $VersionData ` + -ComponentName "cuda_profiler_api" + $manifestNames = Get-RedistribManifestNames -CudaVersionTag $CudaVersionTag + + if ($localProfilerApiVersion) { + Write-Host "CUDA version metadata reports cuda_profiler_api $localProfilerApiVersion." + } else { + Write-Host "CUDA version metadata does not report cuda_profiler_api; matching by installed core components." + } + + $matchComponents = @("cuda_cupti", "cuda_cudart", "cuda_nvcc", "cuda_cccl") + $bestCandidate = $null + + foreach ($manifestName in $manifestNames) { + $manifest = Read-RedistManifest -ManifestName $manifestName + $manifestProfilerApiVersion = Get-ComponentVersion ` + -JsonObject $manifest ` + -ComponentName "cuda_profiler_api" + + if (-not $manifestProfilerApiVersion) { + continue + } + + if ($localProfilerApiVersion) { + if ($manifestProfilerApiVersion -eq $localProfilerApiVersion) { + Write-Host "Selected CUDA redist manifest $manifestName." + return [PSCustomObject]@{ + Name = $manifestName + Manifest = $manifest + } + } + continue + } + + $matches = 0 + $mismatches = @() + foreach ($componentName in $matchComponents) { + $localVersion = Get-ComponentVersion ` + -JsonObject $VersionData ` + -ComponentName $componentName + $manifestVersion = Get-ComponentVersion ` + -JsonObject $manifest ` + -ComponentName $componentName + + if (-not $localVersion -or -not $manifestVersion) { + continue + } + + if ($localVersion -eq $manifestVersion) { + $matches++ + } else { + $mismatches += "$componentName local=$localVersion manifest=$manifestVersion" + } + } + + if ($matches -gt 0 -and $mismatches.Count -eq 0) { + if ($null -eq $bestCandidate -or $matches -gt $bestCandidate.MatchCount) { + $bestCandidate = [PSCustomObject]@{ + Name = $manifestName + Manifest = $manifest + MatchCount = $matches + } + } + } + } + + if ($localProfilerApiVersion) { + throw "Could not find a CUDA $CudaVersionTag redistrib manifest with cuda_profiler_api $localProfilerApiVersion." + } + + if ($null -eq $bestCandidate) { + throw "Could not match installed CUDA Toolkit component versions to a CUDA $CudaVersionTag redistrib manifest." + } + + Write-Host "Selected CUDA redist manifest $($bestCandidate.Name) using $($bestCandidate.MatchCount) component version match(es)." + return [PSCustomObject]@{ + Name = $bestCandidate.Name + Manifest = $bestCandidate.Manifest + } +} + +function Get-PayloadRoot { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$ExtractDir + ) + + $directories = @(Get-ChildItem -LiteralPath $ExtractDir -Directory) + $files = @(Get-ChildItem -LiteralPath $ExtractDir -File) + if ($directories.Count -eq 1 -and $files.Count -eq 0) { + return $directories[0].FullName + } + + return $ExtractDir +} + +function Install-ProfilerApiPackage { + Param( + [Parameter(Mandatory = $true)] + $ManifestSelection, + + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$CudaRoot + ) + + $component = Get-JsonPropertyValue ` + -Object $ManifestSelection.Manifest ` + -Name "cuda_profiler_api" + if ($null -eq $component) { + throw "Manifest $($ManifestSelection.Name) does not contain cuda_profiler_api." + } + + $package = Get-JsonPropertyValue -Object $component -Name "windows-x86_64" + if ($null -eq $package) { + throw "Manifest $($ManifestSelection.Name) does not contain cuda_profiler_api for windows-x86_64." + } + + $relativePath = Get-JsonPropertyValue -Object $package -Name "relative_path" + $expectedSha256 = Get-JsonPropertyValue -Object $package -Name "sha256" + if (-not $relativePath -or -not $expectedSha256) { + throw "Manifest $($ManifestSelection.Name) is missing cuda_profiler_api relative_path or sha256." + } + if ($relativePath -notmatch '^cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-[^/]+-archive\.zip$') { + throw "Unexpected cuda_profiler_api package path in $($ManifestSelection.Name): $relativePath" + } + + $pathParts = $relativePath -split '/' + $archiveName = $pathParts[$pathParts.Length - 1] + $archive = Join-Path $env:TEMP $archiveName + $extractDir = Join-Path $env:TEMP "cuda_profiler_api_$([Guid]::NewGuid().ToString('N'))" + $archiveUri = "$RedistRootUri/$relativePath" + + try { + Write-Host "Downloading CUDA Profiler API redist package: $archiveUri" + Invoke-WebRequestWithRetry -Uri $archiveUri -OutFile $archive + Assert-Sha256 -Path $archive -ExpectedSha256 $expectedSha256 + + Expand-Archive -LiteralPath $archive -DestinationPath $extractDir -Force + $payloadRoot = Get-PayloadRoot -ExtractDir $extractDir + $payloadHeader = Join-Path $payloadRoot "include\cuda_profiler_api.h" + if (-not (Test-Path $payloadHeader)) { + throw "CUDA Profiler API archive did not contain expected header: $payloadHeader" + } + + Write-Host "Installing CUDA Profiler API package into: $CudaRoot" + Copy-Item -Path (Join-Path $payloadRoot "*") -Destination $CudaRoot -Recurse -Force + } finally { + Remove-Item $archive -ErrorAction SilentlyContinue + Remove-Item $extractDir -Recurse -Force -ErrorAction SilentlyContinue + } +} + if (-not $CUDA_VERSION) { - throw "CUDA installer version is required. Provide -cudaVersion .., for example '13.0.2'." + throw "CUDA Toolkit version is required. Provide -cudaVersion ., for example '13.0'." } -if ($CUDA_VERSION -notmatch '^\d+\.\d+\.\d+$') { - throw "Invalid CUDA installer version '$CUDA_VERSION'. Expected '..', for example '13.0.2'." +if ($CUDA_VERSION -notmatch '^\d+\.\d+$') { + throw "Invalid CUDA Toolkit version '$CUDA_VERSION'. Expected '.', for example '13.0'." } $version = [Version]$CUDA_VERSION -$major = $version.Major -$minor = $version.Minor -$build = $version.Build - -$mmbVersionTag = "${major}.${minor}.${build}" -$mmVersionTag = "${major}.${minor}" +$mmVersionTag = "$($version.Major).$($version.Minor)" $nvccCudaRoot = Get-CudaRootFromNvcc if ($nvccCudaRoot) { - $nvccCudaVersion = Get-CudaVersionFromPath -Path $nvccCudaRoot + $nvccCudaVersion = Get-CudaVersionFromRoot -CudaRoot $nvccCudaRoot if (-not $nvccCudaVersion) { throw "Could not determine CUDA version from active nvcc.exe root: $nvccCudaRoot" } @@ -175,9 +473,9 @@ if ($nvccCudaRoot) { } if ($env:CUDA_PATH) { - $cudaPathVersion = Get-CudaVersionFromPath -Path $env:CUDA_PATH + $cudaPathVersion = Get-CudaVersionFromRoot -CudaRoot $env:CUDA_PATH if (-not $cudaPathVersion) { - throw "CUDA_PATH is set but does not end in v.: $env:CUDA_PATH" + throw "Could not determine CUDA version from CUDA_PATH: $env:CUDA_PATH" } if ($cudaPathVersion -ne $mmVersionTag) { throw "CUDA_PATH points to CUDA $cudaPathVersion, but CUDA $mmVersionTag was requested." @@ -194,42 +492,26 @@ if ($env:CUDA_PATH) { } else { $cudaRoot = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$mmVersionTag" } -$profilerHeader = "$cudaRoot\include\cuda_profiler_api.h" +$profilerHeader = Join-Path $cudaRoot "include\cuda_profiler_api.h" if (Test-Path $profilerHeader) { Write-Host "CUDA Profiler API is already installed: $profilerHeader" return } -$component = "cuda_profiler_api_$mmVersionTag" -$cudaMajorUri = "${mmbVersionTag}/network_installers/cuda_${mmbVersionTag}_windows_network.exe" -$cudaVersionUrl = "https://developer.download.nvidia.com/compute/cuda/$cudaMajorUri" -$installer = Join-Path $env:TEMP "cuda_${mmbVersionTag}_windows_network.exe" - -Write-Host "Installing CUDA component: $component" -Write-Host "Downloading CUDA network installer: $cudaVersionUrl" -Invoke-WebRequestWithRetry -Uri $cudaVersionUrl -OutFile $installer -Assert-NvidiaAuthenticodeSignature -Path $installer - -$installerTimeoutSeconds = 900 -$process = $null -try { - $process = Start-Process -PassThru -FilePath $installer -ArgumentList @("-s", $component) - if (-not $process.WaitForExit($installerTimeoutSeconds * 1000)) { - Stop-Process -Id $process.Id -Force -ErrorAction SilentlyContinue - throw "CUDA network installer timed out after $installerTimeoutSeconds seconds." - } - - if ($process.ExitCode -ne 0) { - throw "CUDA network installer failed with exit code $($process.ExitCode)." - } -} finally { - if ($process) { - $process.Dispose() - } - Remove-Item $installer -ErrorAction SilentlyContinue +$versionJson = Join-Path $cudaRoot "version.json" +if (-not (Test-Path $versionJson)) { + throw "CUDA Toolkit version metadata was not found: $versionJson. Cannot determine the matching cuda_profiler_api redist package." } +$versionData = Read-JsonFile -Path $versionJson +$manifestSelection = Select-ProfilerApiManifest ` + -CudaVersionTag $mmVersionTag ` + -VersionData $versionData +Install-ProfilerApiPackage ` + -ManifestSelection $manifestSelection ` + -CudaRoot $cudaRoot + if (-not (Test-Path $profilerHeader)) { throw "CUDA Profiler API installation completed, but header was not found: $profilerHeader" } From 0ab9320abb8b1291b594f34d1eb699aa1a042c4d Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 14:25:21 -0500 Subject: [PATCH 43/56] Remove parameters intended to enable testing builds on Windows. Deferred for future work --- .github/workflows/build-windows.yml | 85 +---------------------------- ci/windows/build_nvbench.ps1 | 44 +-------------- 2 files changed, 4 insertions(+), 125 deletions(-) diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index afb4e743..078240fc 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -35,22 +35,6 @@ on: type: string required: false default: "" - gpu: - type: boolean - required: false - default: false - gpu_devices: - type: string - required: false - default: "all" - run_tests: - type: boolean - required: false - default: false - device_testing: - type: boolean - required: false - default: false workflow_dispatch: inputs: cuda: @@ -89,26 +73,6 @@ on: type: string required: false default: "" - gpu: - description: "Expose host GPUs to the Windows devcontainer" - type: boolean - required: false - default: false - gpu_devices: - description: "GPU devices to expose when gpu is true: 'all' or 'device=0,1'" - type: string - required: false - default: "all" - run_tests: - description: "Run CTest after building" - type: boolean - required: false - default: false - device_testing: - description: "Enable tests that require a CUDA-capable device" - type: boolean - required: false - default: false permissions: contents: read @@ -154,11 +118,6 @@ jobs: NVBENCH_WINDOWS_CUDA: ${{ inputs.cuda }} NVBENCH_WINDOWS_STD: ${{ inputs.std }} NVBENCH_WINDOWS_ARCH: ${{ inputs.arch }} - NVBENCH_WINDOWS_GPU: ${{ inputs.gpu }} - NVBENCH_WINDOWS_GPU_DEVICES: ${{ inputs.gpu_devices }} - NVBENCH_WINDOWS_RUN_TESTS: ${{ inputs.run_tests }} - NVBENCH_WINDOWS_DEVICE_TESTING: ${{ inputs.device_testing }} - NVBENCH_GITHUB_REPOSITORY: ${{ github.repository }} run: | $ErrorActionPreference = "Stop" @@ -170,22 +129,6 @@ jobs: throw "Invalid C++ standard '$env:NVBENCH_WINDOWS_STD'. Expected '17' or '20'." } - $gpuEnabled = $env:NVBENCH_WINDOWS_GPU -eq "true" - $runTests = $env:NVBENCH_WINDOWS_RUN_TESTS -eq "true" - $deviceTesting = $env:NVBENCH_WINDOWS_DEVICE_TESTING -eq "true" - - if ($gpuEnabled -and $env:NVBENCH_GITHUB_REPOSITORY -ne "NVIDIA/nvbench") { - throw "Invalid Windows build inputs: gpu=true is only supported in NVIDIA/nvbench, where the configured runner can be selected." - } - - if ($deviceTesting -and -not $runTests) { - throw "Invalid Windows build inputs: device_testing=true requires run_tests=true." - } - - if ($runTests -and $deviceTesting -and -not $gpuEnabled) { - throw "Invalid Windows build inputs: run_tests=true and device_testing=true require gpu=true." - } - $arch = "$env:NVBENCH_WINDOWS_ARCH".Trim() $normalizedArch = $arch if ($arch) { @@ -204,18 +147,6 @@ jobs: } Add-Content -Path $env:GITHUB_OUTPUT -Value "arch=$normalizedArch" - $gpuDevices = "$env:NVBENCH_WINDOWS_GPU_DEVICES".Trim() - if ($gpuDevices -and $gpuDevices -ne "all" -and $gpuDevices -notmatch '^device=\d+(,\d+)*$') { - throw "Invalid GPU devices value '$gpuDevices'. Expected 'all' or a device list like 'device=0,1'." - } - if ($gpuEnabled -and -not $gpuDevices) { - throw "GPU devices must not be empty when GPU support is enabled." - } - if (-not $gpuEnabled -and $gpuDevices -and $gpuDevices -ne "all") { - throw "Invalid Windows build inputs: gpu_devices may only select specific devices when gpu=true." - } - Add-Content -Path $env:GITHUB_OUTPUT -Value "gpu_devices=$gpuDevices" - - name: Fetch Windows devcontainer image env: WINDOWS_CI_IMAGE: ${{ inputs.image != '' && inputs.image || format('rapidsai/devcontainers:{0}-cuda{1}-{2}', inputs.devcontainer_version, inputs.cuda, inputs.host) }} @@ -227,10 +158,6 @@ jobs: NVBENCH_WINDOWS_CUDA: ${{ inputs.cuda }} NVBENCH_WINDOWS_STD: ${{ inputs.std }} NVBENCH_WINDOWS_ARCH: ${{ steps.validate_windows_build_inputs.outputs.arch }} - NVBENCH_WINDOWS_GPU: ${{ inputs.gpu }} - NVBENCH_WINDOWS_GPU_DEVICES: ${{ steps.validate_windows_build_inputs.outputs.gpu_devices }} - NVBENCH_WINDOWS_RUN_TESTS: ${{ inputs.run_tests }} - NVBENCH_WINDOWS_DEVICE_TESTING: ${{ inputs.device_testing }} WINDOWS_CI_IMAGE: ${{ inputs.image != '' && inputs.image || format('rapidsai/devcontainers:{0}-cuda{1}-{2}', inputs.devcontainer_version, inputs.cuda, inputs.host) }} run: | $ErrorActionPreference = "Stop" @@ -243,7 +170,7 @@ jobs: `$ErrorActionPreference = 'Stop' git config --global --add safe.directory '$containerRepo' & '$containerRepo/ci/windows/install_cuda_profiler_api.ps1' -cudaVersion "`$env:NVBENCH_WINDOWS_CUDA" - & '$containerRepo/ci/windows/build_nvbench.ps1' -std "`$env:NVBENCH_WINDOWS_STD" -arch "`$env:NVBENCH_WINDOWS_ARCH" -run-tests "`$env:NVBENCH_WINDOWS_RUN_TESTS" -device-testing "`$env:NVBENCH_WINDOWS_DEVICE_TESTING" + & '$containerRepo/ci/windows/build_nvbench.ps1' -std "`$env:NVBENCH_WINDOWS_STD" -arch "`$env:NVBENCH_WINDOWS_ARCH" exit `$LASTEXITCODE "@ | Set-Content -Path $script -Encoding UTF8 @@ -266,9 +193,6 @@ jobs: "--env", "GITHUB_SHA=$env:GITHUB_SHA", "--env", "NVBENCH_WINDOWS_ARCH=$env:NVBENCH_WINDOWS_ARCH", "--env", "NVBENCH_WINDOWS_CUDA=$env:NVBENCH_WINDOWS_CUDA", - "--env", "NVBENCH_WINDOWS_DEVICE_TESTING=$env:NVBENCH_WINDOWS_DEVICE_TESTING", - "--env", "NVBENCH_WINDOWS_GPU=$env:NVBENCH_WINDOWS_GPU", - "--env", "NVBENCH_WINDOWS_RUN_TESTS=$env:NVBENCH_WINDOWS_RUN_TESTS", "--env", "NVBENCH_WINDOWS_STD=$env:NVBENCH_WINDOWS_STD", "--env", "SCCACHE_BUCKET=$env:SCCACHE_BUCKET", "--env", "SCCACHE_IDLE_TIMEOUT=$env:SCCACHE_IDLE_TIMEOUT", @@ -279,13 +203,6 @@ jobs: "--env", "SCCACHE_S3_USE_SSL=$env:SCCACHE_S3_USE_SSL" ) - if ($env:NVBENCH_WINDOWS_GPU -eq "true") { - if (-not $env:NVBENCH_WINDOWS_GPU_DEVICES) { - throw "NVBENCH_WINDOWS_GPU_DEVICES must not be empty when GPU support is enabled." - } - $dockerArgs += @("--gpus", $env:NVBENCH_WINDOWS_GPU_DEVICES) - } - $dockerArgs += @( "$env:WINDOWS_CI_IMAGE", "powershell", "-NoLogo", "-NoProfile", "-ExecutionPolicy", "Bypass", diff --git a/ci/windows/build_nvbench.ps1 b/ci/windows/build_nvbench.ps1 index dcc6950f..dac5955b 100644 --- a/ci/windows/build_nvbench.ps1 +++ b/ci/windows/build_nvbench.ps1 @@ -11,37 +11,11 @@ Param( [Parameter(Mandatory = $false)] [Alias("cmake-options")] - [string]$CMAKE_OPTIONS = "", - - [Parameter(Mandatory = $false)] - [Alias("run-tests")] - [string]$RUN_TESTS = "false", - - [Parameter(Mandatory = $false)] - [Alias("device-testing")] - [string]$DEVICE_TESTING = "false" + [string]$CMAKE_OPTIONS = "" ) $ErrorActionPreference = "Stop" -function ConvertTo-Bool { - Param( - [Parameter(Mandatory = $false)] - [AllowNull()] - [string]$Value = "" - ) - - $normalized = if ($null -eq $Value) { "" } else { $Value.Trim().ToLowerInvariant() } - if (@("1", "true", "yes", "on") -contains $normalized) { - return $true - } - if (@("0", "false", "no", "off", "") -contains $normalized) { - return $false - } - - throw "Expected a boolean-like value, got '$Value'." -} - $initialPath = Get-Location $pushed = $false @@ -53,28 +27,16 @@ if ((Split-Path $pwd -Leaf) -ne "ci") { try { Import-Module "$PSScriptRoot/build_common.psm1" -ArgumentList @($CXX_STANDARD, $CUDA_ARCH, $CMAKE_OPTIONS) -Force - $runTests = ConvertTo-Bool $RUN_TESTS - $deviceTesting = ConvertTo-Bool $DEVICE_TESTING - Print-EnvironmentDetails - Write-Host "RUN_TESTS=$runTests" - Write-Host "DEVICE_TESTING=$deviceTesting" $preset = "nvbench-ci" $localOptions = @( "-DCMAKE_CXX_STANDARD=$CXX_STANDARD", - "-DCMAKE_CUDA_STANDARD=$CXX_STANDARD" + "-DCMAKE_CUDA_STANDARD=$CXX_STANDARD", + "-DNVBench_ENABLE_DEVICE_TESTING=ON" ) - if ($deviceTesting) { - $localOptions += "-DNVBench_ENABLE_DEVICE_TESTING=ON" - } else { - $localOptions += "-DNVBench_ENABLE_DEVICE_TESTING=OFF" - } Configure-And-Build-Preset "NVBench" $preset $localOptions - if ($runTests) { - Test-Preset "NVBench" $preset - } } finally { if ($pushed) { Set-Location $initialPath From c55f7f240d360bf0a4cf010b8a51938f6e0d92ad Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 14:34:22 -0500 Subject: [PATCH 44/56] Handle import nvbench::nvbench the same as nvbench target in NVBenchConfigTarget --- cmake/NVBenchConfigTarget.cmake | 75 +++++++++++++++++++++++++++++---- 1 file changed, 67 insertions(+), 8 deletions(-) diff --git a/cmake/NVBenchConfigTarget.cmake b/cmake/NVBenchConfigTarget.cmake index b1c5f758..b194c480 100644 --- a/cmake/NVBenchConfigTarget.cmake +++ b/cmake/NVBenchConfigTarget.cmake @@ -137,6 +137,66 @@ function(nvbench_config_target target_name) endif() endfunction() +function(nvbench_get_imported_location out_var target_name) + get_property(imported_configs TARGET ${target_name} + PROPERTY IMPORTED_CONFIGURATIONS + ) + list(LENGTH imported_configs num_configs) + + if (num_configs GREATER 0) + if (CMAKE_BUILD_TYPE) + string(TOUPPER "${CMAKE_BUILD_TYPE}" build_type) + list(FIND imported_configs "${build_type}" imported_config_index) + else() + set(imported_config_index -1) + endif() + + if (imported_config_index GREATER_EQUAL 0) + list(GET imported_configs ${imported_config_index} imported_config) + else() + list(GET imported_configs 0 imported_config) + endif() + + get_property(imported_location TARGET ${target_name} + PROPERTY IMPORTED_LOCATION_${imported_config} + ) + endif() + + if (NOT imported_location) + get_property(imported_location TARGET ${target_name} + PROPERTY IMPORTED_LOCATION + ) + endif() + + set(${out_var} "${imported_location}" PARENT_SCOPE) +endfunction() + +function(nvbench_append_test_runtime_path path_modifications_var target_name) + if (NOT TARGET ${target_name}) + return() + endif() + + get_property(is_imported TARGET ${target_name} PROPERTY IMPORTED) + if (is_imported) + nvbench_get_imported_location(runtime_artifact ${target_name}) + if (runtime_artifact) + cmake_path(GET runtime_artifact PARENT_PATH runtime_dir) + list(APPEND ${path_modifications_var} + "PATH=path_list_prepend:$" + ) + endif() + else() + list(APPEND ${path_modifications_var} + "PATH=path_list_prepend:$" + ) + endif() + + set(${path_modifications_var} + "${${path_modifications_var}}" + PARENT_SCOPE + ) +endfunction() + function(nvbench_config_test_runtime_environment test_name) if (NOT WIN32) return() @@ -144,18 +204,17 @@ function(nvbench_config_test_runtime_environment test_name) set(path_modifications "") if (TARGET nvbench) - list(APPEND path_modifications "PATH=path_list_prepend:$") + nvbench_append_test_runtime_path(path_modifications nvbench) + else() + nvbench_append_test_runtime_path(path_modifications nvbench::nvbench) endif() - if (TARGET nvbench::cupti) - get_property(cupti_runtime_lib TARGET nvbench::cupti PROPERTY IMPORTED_LOCATION) - if (cupti_runtime_lib) - cmake_path(GET cupti_runtime_lib PARENT_PATH cupti_runtime_dir) - list(APPEND path_modifications "PATH=path_list_prepend:$") - endif() - endif() + nvbench_append_test_runtime_path(path_modifications nvbench::cupti) + nvbench_append_test_runtime_path(path_modifications nvbench::nvperf_target) + nvbench_append_test_runtime_path(path_modifications nvbench::nvperf_host) if (path_modifications) + list(REMOVE_DUPLICATES path_modifications) set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT_MODIFICATION ${path_modifications} ) From dd0a9b075a280001329e78601deddf97aa65bcef Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 14:35:48 -0500 Subject: [PATCH 45/56] Forward cmake variables only if set --- testing/cmake/CMakeLists.txt | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/testing/cmake/CMakeLists.txt b/testing/cmake/CMakeLists.txt index 22e05b9b..a9a5ea44 100644 --- a/testing/cmake/CMakeLists.txt +++ b/testing/cmake/CMakeLists.txt @@ -16,10 +16,16 @@ if (WIN32) endif() list(APPEND cmake_opts -D "CMAKE_CUDA_HOST_COMPILER=${cuda_host_compiler}" - -D "CMAKE_LINKER=${CMAKE_LINKER}" - -D "CMAKE_RC_COMPILER=${CMAKE_RC_COMPILER}" - -D "CMAKE_MT=${CMAKE_MT}" ) + if (CMAKE_LINKER) + list(APPEND cmake_opts -D "CMAKE_LINKER=${CMAKE_LINKER}") + endif() + if (CMAKE_RC_COMPILER) + list(APPEND cmake_opts -D "CMAKE_RC_COMPILER=${CMAKE_RC_COMPILER}") + endif() + if (CMAKE_MT) + list(APPEND cmake_opts -D "CMAKE_MT=${CMAKE_MT}") + endif() endif() # Temporary installation prefix for tests against installed nvbench: From d1a50a545b790d5ae0abeaab3c129352e0fee5fd Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 14:50:01 -0500 Subject: [PATCH 46/56] Use UTF-8 encoding when appending to GITHUB_OUTPUT --- .github/workflows/build-windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index 078240fc..ffb8a90c 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -145,7 +145,7 @@ jobs: $normalizedArch = $archItems -join ';' } } - Add-Content -Path $env:GITHUB_OUTPUT -Value "arch=$normalizedArch" + "arch=$normalizedArch" | Out-File -FilePath $env:GITHUB_OUTPUT -Encoding utf8 -Append - name: Fetch Windows devcontainer image env: From 697f38745dd940d7c766afa3cb1d2cc88555ad70 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 14:52:50 -0500 Subject: [PATCH 47/56] Avoid power-shell footgun where local variable shadows builtin variable due to case insensitivity --- ci/windows/install_cuda_profiler_api.ps1 | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ci/windows/install_cuda_profiler_api.ps1 b/ci/windows/install_cuda_profiler_api.ps1 index 5f11de2a..556a32ee 100644 --- a/ci/windows/install_cuda_profiler_api.ps1 +++ b/ci/windows/install_cuda_profiler_api.ps1 @@ -330,7 +330,7 @@ function Select-ProfilerApiManifest { continue } - $matches = 0 + $componentMatches = 0 $mismatches = @() foreach ($componentName in $matchComponents) { $localVersion = Get-ComponentVersion ` @@ -345,18 +345,18 @@ function Select-ProfilerApiManifest { } if ($localVersion -eq $manifestVersion) { - $matches++ + $componentMatches++ } else { $mismatches += "$componentName local=$localVersion manifest=$manifestVersion" } } - if ($matches -gt 0 -and $mismatches.Count -eq 0) { - if ($null -eq $bestCandidate -or $matches -gt $bestCandidate.MatchCount) { + if ($componentMatches -gt 0 -and $mismatches.Count -eq 0) { + if ($null -eq $bestCandidate -or $componentMatches -gt $bestCandidate.MatchCount) { $bestCandidate = [PSCustomObject]@{ Name = $manifestName Manifest = $manifest - MatchCount = $matches + MatchCount = $componentMatches } } } From 307ecf5eae88dc842eb435688e32e77f4a016ffe Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 15:05:19 -0500 Subject: [PATCH 48/56] enable device testing parameter in build_nvbench, passed as True by workflow --- .github/workflows/build-windows.yml | 2 +- ci/windows/build_nvbench.ps1 | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index ffb8a90c..a51e40aa 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -170,7 +170,7 @@ jobs: `$ErrorActionPreference = 'Stop' git config --global --add safe.directory '$containerRepo' & '$containerRepo/ci/windows/install_cuda_profiler_api.ps1' -cudaVersion "`$env:NVBENCH_WINDOWS_CUDA" - & '$containerRepo/ci/windows/build_nvbench.ps1' -std "`$env:NVBENCH_WINDOWS_STD" -arch "`$env:NVBENCH_WINDOWS_ARCH" + & '$containerRepo/ci/windows/build_nvbench.ps1' -std "`$env:NVBENCH_WINDOWS_STD" -arch "`$env:NVBENCH_WINDOWS_ARCH" -device-testing `$true exit `$LASTEXITCODE "@ | Set-Content -Path $script -Encoding UTF8 diff --git a/ci/windows/build_nvbench.ps1 b/ci/windows/build_nvbench.ps1 index dac5955b..c1805267 100644 --- a/ci/windows/build_nvbench.ps1 +++ b/ci/windows/build_nvbench.ps1 @@ -11,7 +11,11 @@ Param( [Parameter(Mandatory = $false)] [Alias("cmake-options")] - [string]$CMAKE_OPTIONS = "" + [string]$CMAKE_OPTIONS = "", + + [Parameter(Mandatory = $false)] + [Alias("device-testing")] + [bool]$DEVICE_TESTING = $false ) $ErrorActionPreference = "Stop" @@ -30,10 +34,11 @@ try { Print-EnvironmentDetails $preset = "nvbench-ci" + $deviceTestingOption = if ($DEVICE_TESTING) { "ON" } else { "OFF" } $localOptions = @( "-DCMAKE_CXX_STANDARD=$CXX_STANDARD", "-DCMAKE_CUDA_STANDARD=$CXX_STANDARD", - "-DNVBench_ENABLE_DEVICE_TESTING=ON" + "-DNVBench_ENABLE_DEVICE_TESTING=$deviceTestingOption" ) Configure-And-Build-Preset "NVBench" $preset $localOptions From 9cd2c0f40a281aabfd0f6c5750613cb64b85c5b4 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 15:05:43 -0500 Subject: [PATCH 49/56] Lower CMake version required as much as possible --- testing/cmake/test_export/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/cmake/test_export/CMakeLists.txt b/testing/cmake/test_export/CMakeLists.txt index 5bfbe540..d1e72ff8 100644 --- a/testing/cmake/test_export/CMakeLists.txt +++ b/testing/cmake/test_export/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.30.4) +cmake_minimum_required(VERSION 3.21.0) project(NVBenchTestExport CUDA CXX) message(STATUS "NVBench_DIR=${NVBench_DIR}") From 9bfe117be5f1a21bba75651a8f9e702c1c66cc1d Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 15:06:27 -0500 Subject: [PATCH 50/56] LINKER:/INCLUDE:main for proper CUDA link driver routing --- nvbench/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nvbench/CMakeLists.txt b/nvbench/CMakeLists.txt index 6a436e73..ba505bf2 100644 --- a/nvbench/CMakeLists.txt +++ b/nvbench/CMakeLists.txt @@ -142,7 +142,7 @@ target_compile_definitions(nvbench.main PRIVATE NVBENCH_NO_IMPLICIT_SYSTEM_HEADE target_link_libraries(nvbench.main PUBLIC nvbench) if (MSVC) # inform MSVC that library provides main - target_link_options(nvbench.main INTERFACE "/INCLUDE:main") + target_link_options(nvbench.main INTERFACE "LINKER:/INCLUDE:main") endif() # Ensure CUDA/CUPTI/NVML include dirs are visible for nvbench.main's build. target_link_libraries(nvbench.main PRIVATE ${ctk_libraries}) From 2d004ec18c902001dd0da48c9c9b1d2b96bfee81 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 15:12:53 -0500 Subject: [PATCH 51/56] Add conda-specific hints for find_library call to find CUPTI --- cmake/NVBenchCUPTI.cmake | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/cmake/NVBenchCUPTI.cmake b/cmake/NVBenchCUPTI.cmake index 3cce8d7f..18a25bd1 100644 --- a/cmake/NVBenchCUPTI.cmake +++ b/cmake/NVBenchCUPTI.cmake @@ -12,6 +12,14 @@ else() set(nvbench_cupti_root "${CUDAToolkit_LIBRARY_ROOT}") endif() +set(nvbench_cupti_library_hints "${nvbench_cupti_root}/lib64") +if (WIN32) + list(APPEND nvbench_cupti_library_hints + "${nvbench_cupti_root}/lib/x64" + "${nvbench_cupti_root}/lib" + ) +endif() + # The CUPTI targets in FindCUDAToolkit are broken: # - The dll locations are not specified # - Dependent libraries nvperf_* are not linked. @@ -78,8 +86,8 @@ function(nvbench_add_cupti_dep dep_name) add_library(nvbench::${dep_name_lower} SHARED IMPORTED) find_library(NVBench_${dep_name_upper}_LIBRARY ${dep_name_lower} REQUIRED - DOC "The import library for ${dep_name_lower} from the CUDA Toolkit." - HINTS "${nvbench_cupti_root}/lib64" + DOC "The library for ${dep_name_lower} from the CUDA Toolkit." + HINTS ${nvbench_cupti_library_hints} ) mark_as_advanced(NVBench_${dep_name_upper}_LIBRARY) From 25ab2176f58f319e1c6891c5daff0f035d616ec6 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 15:20:09 -0500 Subject: [PATCH 52/56] test_export must require 3.22 version ENVIRONMENT_MODIFICATION feature was added in 3.22.0 https://cmake.org/cmake/help/latest/prop_test/ENVIRONMENT_MODIFICATION.html --- testing/cmake/test_export/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/cmake/test_export/CMakeLists.txt b/testing/cmake/test_export/CMakeLists.txt index d1e72ff8..f0aae8b2 100644 --- a/testing/cmake/test_export/CMakeLists.txt +++ b/testing/cmake/test_export/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.21.0) +cmake_minimum_required(VERSION 3.22.0) project(NVBenchTestExport CUDA CXX) message(STATUS "NVBench_DIR=${NVBench_DIR}") From 4ece868978412a5fa65bd37cab486300de6b1849 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 15:21:41 -0500 Subject: [PATCH 53/56] Delete unused function Test-Preset --- ci/windows/build_common.psm1 | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/ci/windows/build_common.psm1 b/ci/windows/build_common.psm1 index 99217dc6..f2a32655 100644 --- a/ci/windows/build_common.psm1 +++ b/ci/windows/build_common.psm1 @@ -178,23 +178,4 @@ function Configure-And-Build-Preset { Build-Preset $BUILD_NAME $PRESET } -function Test-Preset { - Param( - [Parameter(Mandatory = $true)] - [ValidateNotNullOrEmpty()] - [string]$BUILD_NAME, - - [Parameter(Mandatory = $true)] - [ValidateNotNullOrEmpty()] - [string]$PRESET - ) - - Push-Location ".." - try { - Invoke-NativeCommand "$BUILD_NAME test" "ctest" @("--preset=$PRESET", "--output-on-failure") - } finally { - Pop-Location - } -} - -Export-ModuleMember -Function Print-EnvironmentDetails, Configure-Preset, Build-Preset, Configure-And-Build-Preset, Test-Preset +Export-ModuleMember -Function Print-EnvironmentDetails, Configure-Preset, Build-Preset, Configure-And-Build-Preset From 3cd661a5deccab75eb53183f7697e5f7dfd1c278 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 15:22:51 -0500 Subject: [PATCH 54/56] Guard the CUPTI runtime path extraction Check before executing cmake_path() in testing/cmake/CMakeLists.txt Also, use nvbench_get_imported_location to extract imported location --- testing/cmake/CMakeLists.txt | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/testing/cmake/CMakeLists.txt b/testing/cmake/CMakeLists.txt index a9a5ea44..69bd59cc 100644 --- a/testing/cmake/CMakeLists.txt +++ b/testing/cmake/CMakeLists.txt @@ -53,10 +53,12 @@ function(nvbench_add_compile_test full_test_name_var subdir test_id) if (WIN32) set(path_mods "PATH=path_list_prepend:$") if (TARGET nvbench::cupti) - get_property(cupti_runtime_lib TARGET nvbench::cupti PROPERTY IMPORTED_LOCATION) - cmake_path(GET cupti_runtime_lib PARENT_PATH cupti_lib_dir) - if (cupti_lib_dir) - list(PREPEND path_mods "PATH=path_list_prepend:$") + nvbench_get_imported_location(cupti_runtime_lib nvbench::cupti) + if (cupti_runtime_lib) + cmake_path(GET cupti_runtime_lib PARENT_PATH cupti_lib_dir) + if (cupti_lib_dir) + list(PREPEND path_mods "PATH=path_list_prepend:$") + endif() endif() endif() set_property(TEST ${test_name} PROPERTY From 1cc8d01fd9ac6c4107ae30bd872267da104c3fd8 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 15:31:45 -0500 Subject: [PATCH 55/56] use the config-aware generator expression for all runtime targets Remove the configure-time imported-location helper entirely. --- cmake/NVBenchConfigTarget.cmake | 51 ++------------------------------- testing/cmake/CMakeLists.txt | 8 +----- 2 files changed, 4 insertions(+), 55 deletions(-) diff --git a/cmake/NVBenchConfigTarget.cmake b/cmake/NVBenchConfigTarget.cmake index b194c480..74652df1 100644 --- a/cmake/NVBenchConfigTarget.cmake +++ b/cmake/NVBenchConfigTarget.cmake @@ -137,59 +137,14 @@ function(nvbench_config_target target_name) endif() endfunction() -function(nvbench_get_imported_location out_var target_name) - get_property(imported_configs TARGET ${target_name} - PROPERTY IMPORTED_CONFIGURATIONS - ) - list(LENGTH imported_configs num_configs) - - if (num_configs GREATER 0) - if (CMAKE_BUILD_TYPE) - string(TOUPPER "${CMAKE_BUILD_TYPE}" build_type) - list(FIND imported_configs "${build_type}" imported_config_index) - else() - set(imported_config_index -1) - endif() - - if (imported_config_index GREATER_EQUAL 0) - list(GET imported_configs ${imported_config_index} imported_config) - else() - list(GET imported_configs 0 imported_config) - endif() - - get_property(imported_location TARGET ${target_name} - PROPERTY IMPORTED_LOCATION_${imported_config} - ) - endif() - - if (NOT imported_location) - get_property(imported_location TARGET ${target_name} - PROPERTY IMPORTED_LOCATION - ) - endif() - - set(${out_var} "${imported_location}" PARENT_SCOPE) -endfunction() - function(nvbench_append_test_runtime_path path_modifications_var target_name) if (NOT TARGET ${target_name}) return() endif() - get_property(is_imported TARGET ${target_name} PROPERTY IMPORTED) - if (is_imported) - nvbench_get_imported_location(runtime_artifact ${target_name}) - if (runtime_artifact) - cmake_path(GET runtime_artifact PARENT_PATH runtime_dir) - list(APPEND ${path_modifications_var} - "PATH=path_list_prepend:$" - ) - endif() - else() - list(APPEND ${path_modifications_var} - "PATH=path_list_prepend:$" - ) - endif() + list(APPEND ${path_modifications_var} + "PATH=path_list_prepend:$" + ) set(${path_modifications_var} "${${path_modifications_var}}" diff --git a/testing/cmake/CMakeLists.txt b/testing/cmake/CMakeLists.txt index 69bd59cc..d2082575 100644 --- a/testing/cmake/CMakeLists.txt +++ b/testing/cmake/CMakeLists.txt @@ -53,13 +53,7 @@ function(nvbench_add_compile_test full_test_name_var subdir test_id) if (WIN32) set(path_mods "PATH=path_list_prepend:$") if (TARGET nvbench::cupti) - nvbench_get_imported_location(cupti_runtime_lib nvbench::cupti) - if (cupti_runtime_lib) - cmake_path(GET cupti_runtime_lib PARENT_PATH cupti_lib_dir) - if (cupti_lib_dir) - list(PREPEND path_mods "PATH=path_list_prepend:$") - endif() - endif() + list(PREPEND path_mods "PATH=path_list_prepend:$") endif() set_property(TEST ${test_name} PROPERTY ENVIRONMENT_MODIFICATION ${path_mods} From 9486121d15031c6608679aebf142aacaf86afd37 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 18 May 2026 15:34:26 -0500 Subject: [PATCH 56/56] Deduplicate WINDOWS_CI_IMAGE construction --- .github/workflows/build-windows.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index a51e40aa..a61c4fb5 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -85,6 +85,7 @@ jobs: id-token: write contents: read env: + WINDOWS_CI_IMAGE: ${{ inputs.image != '' && inputs.image || format('rapidsai/devcontainers:{0}-cuda{1}-{2}', inputs.devcontainer_version, inputs.cuda, inputs.host) }} SCCACHE_BUCKET: rapids-sccache-devs SCCACHE_REGION: us-east-2 SCCACHE_IDLE_TIMEOUT: "0" @@ -148,8 +149,6 @@ jobs: "arch=$normalizedArch" | Out-File -FilePath $env:GITHUB_OUTPUT -Encoding utf8 -Append - name: Fetch Windows devcontainer image - env: - WINDOWS_CI_IMAGE: ${{ inputs.image != '' && inputs.image || format('rapidsai/devcontainers:{0}-cuda{1}-{2}', inputs.devcontainer_version, inputs.cuda, inputs.host) }} run: | docker pull "$env:WINDOWS_CI_IMAGE" @@ -158,7 +157,6 @@ jobs: NVBENCH_WINDOWS_CUDA: ${{ inputs.cuda }} NVBENCH_WINDOWS_STD: ${{ inputs.std }} NVBENCH_WINDOWS_ARCH: ${{ steps.validate_windows_build_inputs.outputs.arch }} - WINDOWS_CI_IMAGE: ${{ inputs.image != '' && inputs.image || format('rapidsai/devcontainers:{0}-cuda{1}-{2}', inputs.devcontainer_version, inputs.cuda, inputs.host) }} run: | $ErrorActionPreference = "Stop"