diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index 777d2665..a61c4fb5 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -38,7 +38,7 @@ on: workflow_dispatch: inputs: cuda: - description: "CUDA Toolkit version" + description: "CUDA Toolkit major.minor version" type: string required: false default: "13.0" @@ -113,14 +113,50 @@ jobs: aws-region: us-east-2 role-duration-seconds: 43200 + - name: Validate Windows build inputs + id: validate_windows_build_inputs + env: + NVBENCH_WINDOWS_CUDA: ${{ inputs.cuda }} + NVBENCH_WINDOWS_STD: ${{ inputs.std }} + NVBENCH_WINDOWS_ARCH: ${{ inputs.arch }} + run: | + $ErrorActionPreference = "Stop" + + if ($env:NVBENCH_WINDOWS_CUDA -notmatch '^\d+\.\d+$') { + throw "Invalid CUDA version '$env:NVBENCH_WINDOWS_CUDA'. Expected '.', for example '13.0'." + } + + if (@("17", "20") -notcontains $env:NVBENCH_WINDOWS_STD) { + throw "Invalid C++ standard '$env:NVBENCH_WINDOWS_STD'. Expected '17' or '20'." + } + + $arch = "$env:NVBENCH_WINDOWS_ARCH".Trim() + $normalizedArch = $arch + if ($arch) { + if (@("all", "all-major", "native") -notcontains $arch) { + $archItems = @($arch -split '[;,]' | ForEach-Object { $_.Trim() } | Where-Object { $_ }) + if ($archItems.Length -eq 0) { + throw "Invalid CMAKE_CUDA_ARCHITECTURES value '$arch'. Expected empty, 'all', 'all-major', 'native', or a list like '80;90-real'." + } + foreach ($archItem in $archItems) { + if ($archItem -notmatch '^\d{2,3}(-real|-virtual)?$') { + throw "Invalid CMAKE_CUDA_ARCHITECTURES value '$arch'. Expected empty, 'all', 'all-major', 'native', or a list like '80;90-real'." + } + } + $normalizedArch = $archItems -join ';' + } + } + "arch=$normalizedArch" | Out-File -FilePath $env:GITHUB_OUTPUT -Encoding utf8 -Append + - name: Fetch Windows devcontainer image run: | docker pull "$env:WINDOWS_CI_IMAGE" - name: Build NVBench env: + NVBENCH_WINDOWS_CUDA: ${{ inputs.cuda }} NVBENCH_WINDOWS_STD: ${{ inputs.std }} - NVBENCH_WINDOWS_ARCH: ${{ inputs.arch }} + NVBENCH_WINDOWS_ARCH: ${{ steps.validate_windows_build_inputs.outputs.arch }} run: | $ErrorActionPreference = "Stop" @@ -131,7 +167,8 @@ jobs: @" `$ErrorActionPreference = 'Stop' git config --global --add safe.directory '$containerRepo' - & '$containerRepo/ci/windows/build_nvbench.ps1' -std '$env:NVBENCH_WINDOWS_STD' -arch '$env:NVBENCH_WINDOWS_ARCH' + & '$containerRepo/ci/windows/install_cuda_profiler_api.ps1' -cudaVersion "`$env:NVBENCH_WINDOWS_CUDA" + & '$containerRepo/ci/windows/build_nvbench.ps1' -std "`$env:NVBENCH_WINDOWS_STD" -arch "`$env:NVBENCH_WINDOWS_ARCH" -device-testing `$true exit `$LASTEXITCODE "@ | Set-Content -Path $script -Encoding UTF8 @@ -152,13 +189,19 @@ jobs: "--env", "GITHUB_REPOSITORY=$env:GITHUB_REPOSITORY", "--env", "GITHUB_RUN_ID=$env:GITHUB_RUN_ID", "--env", "GITHUB_SHA=$env:GITHUB_SHA", + "--env", "NVBENCH_WINDOWS_ARCH=$env:NVBENCH_WINDOWS_ARCH", + "--env", "NVBENCH_WINDOWS_CUDA=$env:NVBENCH_WINDOWS_CUDA", + "--env", "NVBENCH_WINDOWS_STD=$env:NVBENCH_WINDOWS_STD", "--env", "SCCACHE_BUCKET=$env:SCCACHE_BUCKET", "--env", "SCCACHE_IDLE_TIMEOUT=$env:SCCACHE_IDLE_TIMEOUT", "--env", "SCCACHE_REGION=$env:SCCACHE_REGION", "--env", "SCCACHE_S3_NO_CREDENTIALS=$env:SCCACHE_S3_NO_CREDENTIALS", "--env", "SCCACHE_S3_PREPROCESSOR_CACHE_KEY_PREFIX=$env:SCCACHE_S3_PREPROCESSOR_CACHE_KEY_PREFIX", "--env", "SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE=$env:SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE", - "--env", "SCCACHE_S3_USE_SSL=$env:SCCACHE_S3_USE_SSL", + "--env", "SCCACHE_S3_USE_SSL=$env:SCCACHE_S3_USE_SSL" + ) + + $dockerArgs += @( "$env:WINDOWS_CI_IMAGE", "powershell", "-NoLogo", "-NoProfile", "-ExecutionPolicy", "Bypass", "-File", $containerScript diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 7ea85397..40427264 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -79,8 +79,6 @@ jobs: nvbench-windows: name: NVBench Windows CUDA${{ matrix.config.cuda }} ${{ matrix.config.host }} C++${{ matrix.config.std }} - # TODO: Re-enable after https://github.com/NVIDIA/nvbench/pull/354 fixes the Windows build. - if: false permissions: id-token: write contents: read diff --git a/CMakeLists.txt b/CMakeLists.txt index 01b39bbe..fcd44bb4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,6 +39,9 @@ if (${CUDAToolkit_VERSION} VERSION_LESS 11.3) endif() option(BUILD_SHARED_LIBS "Build NVBench as a shared library" ON) +if (WIN32 AND BUILD_SHARED_LIBS) + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) +endif() option(NVBench_ENABLE_NVML "Build with NVML support from the Cuda Toolkit." ON) option(NVBench_ENABLE_CUPTI "Build NVBench with CUPTI." ${cupti_default}) diff --git a/ci/windows/build_nvbench.ps1 b/ci/windows/build_nvbench.ps1 index a39b0b34..c1805267 100644 --- a/ci/windows/build_nvbench.ps1 +++ b/ci/windows/build_nvbench.ps1 @@ -11,7 +11,11 @@ Param( [Parameter(Mandatory = $false)] [Alias("cmake-options")] - [string]$CMAKE_OPTIONS = "" + [string]$CMAKE_OPTIONS = "", + + [Parameter(Mandatory = $false)] + [Alias("device-testing")] + [bool]$DEVICE_TESTING = $false ) $ErrorActionPreference = "Stop" @@ -30,9 +34,11 @@ try { Print-EnvironmentDetails $preset = "nvbench-ci" + $deviceTestingOption = if ($DEVICE_TESTING) { "ON" } else { "OFF" } $localOptions = @( "-DCMAKE_CXX_STANDARD=$CXX_STANDARD", - "-DCMAKE_CUDA_STANDARD=$CXX_STANDARD" + "-DCMAKE_CUDA_STANDARD=$CXX_STANDARD", + "-DNVBench_ENABLE_DEVICE_TESTING=$deviceTestingOption" ) Configure-And-Build-Preset "NVBench" $preset $localOptions diff --git a/ci/windows/install_cuda_profiler_api.ps1 b/ci/windows/install_cuda_profiler_api.ps1 new file mode 100644 index 00000000..556a32ee --- /dev/null +++ b/ci/windows/install_cuda_profiler_api.ps1 @@ -0,0 +1,519 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +Param( + [Parameter(Mandatory = $false)] + [Alias("cudaVersion")] + [string]$CUDA_VERSION = "" +) + +$ErrorActionPreference = "Stop" + +$RedistRootUri = "https://developer.download.nvidia.com/compute/cuda/redist" + +function Get-CudaVersionFromPath { + Param( + [Parameter(Mandatory = $false)] + [string]$Path = "" + ) + + if ($Path -and $Path -match "v(?\d+\.\d+)[\\/]?$") { + return $Matches.version + } + + return "" +} + +function Get-CudaRootFromNvcc { + $nvccCommand = Get-Command "nvcc.exe" -ErrorAction SilentlyContinue + if (-not $nvccCommand) { + return "" + } + + $nvccPath = $nvccCommand.Source + $binDir = Split-Path -Parent $nvccPath + if ((Split-Path -Leaf $binDir) -ne "bin") { + throw "Could not derive CUDA root from nvcc.exe path: $nvccPath" + } + + return Split-Path -Parent $binDir +} + +function Assert-SamePath { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$Left, + + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$Right, + + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$Message + ) + + $leftFullPath = [System.IO.Path]::GetFullPath($Left).TrimEnd('\', '/') + $rightFullPath = [System.IO.Path]::GetFullPath($Right).TrimEnd('\', '/') + if ($leftFullPath -ne $rightFullPath) { + throw "$Message Left='$leftFullPath' Right='$rightFullPath'" + } +} + +function Get-HttpStatusCodeFromError { + Param( + [Parameter(Mandatory = $true)] + $ErrorRecord + ) + + $responseProperty = $ErrorRecord.Exception.PSObject.Properties["Response"] + if (-not $responseProperty) { + return $null + } + + $response = $responseProperty.Value + if ($null -eq $response) { + return $null + } + + $statusCodeProperty = $response.PSObject.Properties["StatusCode"] + if (-not $statusCodeProperty) { + return $null + } + + return [int]$statusCodeProperty.Value +} + +function Invoke-WebRequestWithRetry { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$Uri, + + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$OutFile, + + [Parameter(Mandatory = $false)] + [ValidateRange(1, 10)] + [int]$MaxAttempts = 3 + ) + + for ($attempt = 1; $attempt -le $MaxAttempts; $attempt++) { + try { + Remove-Item $OutFile -ErrorAction SilentlyContinue + Invoke-WebRequest -Uri $Uri -OutFile $OutFile -UseBasicParsing -TimeoutSec 300 + return + } catch { + $statusCode = Get-HttpStatusCodeFromError -ErrorRecord $_ + # Fail fast for deterministic client errors that indicate a bad URL, + # missing package, or unsupported method. Keep 408/429 and 5xx on + # the retry path because they are commonly transient in CI. + if (@(400, 401, 403, 404, 405, 410, 414) -contains $statusCode) { + throw "Download failed with non-retryable HTTP status $statusCode from '$Uri'. $_" + } + + if ($attempt -eq $MaxAttempts) { + throw + } + + $delaySeconds = 5 * $attempt + Write-Warning "Download failed on attempt $attempt of $MaxAttempts. Retrying in $delaySeconds seconds. $_" + Start-Sleep -Seconds $delaySeconds + } + } +} + +function Read-JsonFile { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$Path + ) + + try { + $content = Get-Content -LiteralPath $Path -Raw + $json = $content | ConvertFrom-Json + return $json + } catch { + throw "Failed to parse JSON file '$Path'. $_" + } +} + +function Get-JsonPropertyValue { + Param( + [Parameter(Mandatory = $true)] + $Object, + + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$Name + ) + + if ($null -eq $Object) { + return $null + } + + $property = $Object.PSObject.Properties[$Name] + if (-not $property) { + return $null + } + + return $property.Value +} + +function Get-ComponentVersion { + Param( + [Parameter(Mandatory = $true)] + $JsonObject, + + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$ComponentName + ) + + $component = Get-JsonPropertyValue -Object $JsonObject -Name $ComponentName + if ($null -eq $component) { + return "" + } + + $version = Get-JsonPropertyValue -Object $component -Name "version" + if ($null -eq $version) { + return "" + } + + return [string]$version +} + +function Get-CudaVersionFromRoot { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$CudaRoot + ) + + $pathVersion = Get-CudaVersionFromPath -Path $CudaRoot + if ($pathVersion) { + return $pathVersion + } + + $versionJson = Join-Path $CudaRoot "version.json" + if (Test-Path $versionJson) { + $versionData = Read-JsonFile -Path $versionJson + $cudaVersion = Get-ComponentVersion -JsonObject $versionData -ComponentName "cuda" + if ($cudaVersion -match '^(?\d+\.\d+)(\.|$)') { + return $Matches.version + } + } + + return "" +} + +function Assert-Sha256 { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$Path, + + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$ExpectedSha256 + ) + + $actualSha256 = (Get-FileHash -LiteralPath $Path -Algorithm SHA256).Hash.ToLowerInvariant() + $expectedSha256 = $ExpectedSha256.ToLowerInvariant() + if ($actualSha256 -ne $expectedSha256) { + throw "SHA256 mismatch for '$Path'. Expected '$expectedSha256', got '$actualSha256'." + } + + Write-Host "Validated SHA256 for '$Path': $actualSha256" +} + +function Get-RedistribManifestNames { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$CudaVersionTag + ) + + $indexFile = Join-Path $env:TEMP "cuda_redist_index_$PID.html" + try { + Invoke-WebRequestWithRetry -Uri "$RedistRootUri/" -OutFile $indexFile + $indexContent = Get-Content -LiteralPath $indexFile -Raw + } finally { + Remove-Item $indexFile -ErrorAction SilentlyContinue + } + + $pattern = "redistrib_$([regex]::Escape($CudaVersionTag))\.\d+\.json" + $manifestNames = @( + [regex]::Matches($indexContent, $pattern) | + ForEach-Object { $_.Value } | + Sort-Object -Unique + ) + + if ($manifestNames.Count -eq 0) { + throw "No CUDA $CudaVersionTag redistrib manifests were found at $RedistRootUri." + } + + return @( + $manifestNames | + ForEach-Object { + [PSCustomObject]@{ + Name = $_ + Version = [Version](($_ -replace '^redistrib_', '') -replace '\.json$', '') + } + } | + Sort-Object -Property Version -Descending | + ForEach-Object { $_.Name } + ) +} + +function Read-RedistManifest { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$ManifestName + ) + + $manifestFile = Join-Path $env:TEMP $ManifestName + try { + Invoke-WebRequestWithRetry -Uri "$RedistRootUri/$ManifestName" -OutFile $manifestFile + return Read-JsonFile -Path $manifestFile + } finally { + Remove-Item $manifestFile -ErrorAction SilentlyContinue + } +} + +function Select-ProfilerApiManifest { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$CudaVersionTag, + + [Parameter(Mandatory = $true)] + $VersionData + ) + + $localProfilerApiVersion = Get-ComponentVersion ` + -JsonObject $VersionData ` + -ComponentName "cuda_profiler_api" + $manifestNames = Get-RedistribManifestNames -CudaVersionTag $CudaVersionTag + + if ($localProfilerApiVersion) { + Write-Host "CUDA version metadata reports cuda_profiler_api $localProfilerApiVersion." + } else { + Write-Host "CUDA version metadata does not report cuda_profiler_api; matching by installed core components." + } + + $matchComponents = @("cuda_cupti", "cuda_cudart", "cuda_nvcc", "cuda_cccl") + $bestCandidate = $null + + foreach ($manifestName in $manifestNames) { + $manifest = Read-RedistManifest -ManifestName $manifestName + $manifestProfilerApiVersion = Get-ComponentVersion ` + -JsonObject $manifest ` + -ComponentName "cuda_profiler_api" + + if (-not $manifestProfilerApiVersion) { + continue + } + + if ($localProfilerApiVersion) { + if ($manifestProfilerApiVersion -eq $localProfilerApiVersion) { + Write-Host "Selected CUDA redist manifest $manifestName." + return [PSCustomObject]@{ + Name = $manifestName + Manifest = $manifest + } + } + continue + } + + $componentMatches = 0 + $mismatches = @() + foreach ($componentName in $matchComponents) { + $localVersion = Get-ComponentVersion ` + -JsonObject $VersionData ` + -ComponentName $componentName + $manifestVersion = Get-ComponentVersion ` + -JsonObject $manifest ` + -ComponentName $componentName + + if (-not $localVersion -or -not $manifestVersion) { + continue + } + + if ($localVersion -eq $manifestVersion) { + $componentMatches++ + } else { + $mismatches += "$componentName local=$localVersion manifest=$manifestVersion" + } + } + + if ($componentMatches -gt 0 -and $mismatches.Count -eq 0) { + if ($null -eq $bestCandidate -or $componentMatches -gt $bestCandidate.MatchCount) { + $bestCandidate = [PSCustomObject]@{ + Name = $manifestName + Manifest = $manifest + MatchCount = $componentMatches + } + } + } + } + + if ($localProfilerApiVersion) { + throw "Could not find a CUDA $CudaVersionTag redistrib manifest with cuda_profiler_api $localProfilerApiVersion." + } + + if ($null -eq $bestCandidate) { + throw "Could not match installed CUDA Toolkit component versions to a CUDA $CudaVersionTag redistrib manifest." + } + + Write-Host "Selected CUDA redist manifest $($bestCandidate.Name) using $($bestCandidate.MatchCount) component version match(es)." + return [PSCustomObject]@{ + Name = $bestCandidate.Name + Manifest = $bestCandidate.Manifest + } +} + +function Get-PayloadRoot { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$ExtractDir + ) + + $directories = @(Get-ChildItem -LiteralPath $ExtractDir -Directory) + $files = @(Get-ChildItem -LiteralPath $ExtractDir -File) + if ($directories.Count -eq 1 -and $files.Count -eq 0) { + return $directories[0].FullName + } + + return $ExtractDir +} + +function Install-ProfilerApiPackage { + Param( + [Parameter(Mandatory = $true)] + $ManifestSelection, + + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$CudaRoot + ) + + $component = Get-JsonPropertyValue ` + -Object $ManifestSelection.Manifest ` + -Name "cuda_profiler_api" + if ($null -eq $component) { + throw "Manifest $($ManifestSelection.Name) does not contain cuda_profiler_api." + } + + $package = Get-JsonPropertyValue -Object $component -Name "windows-x86_64" + if ($null -eq $package) { + throw "Manifest $($ManifestSelection.Name) does not contain cuda_profiler_api for windows-x86_64." + } + + $relativePath = Get-JsonPropertyValue -Object $package -Name "relative_path" + $expectedSha256 = Get-JsonPropertyValue -Object $package -Name "sha256" + if (-not $relativePath -or -not $expectedSha256) { + throw "Manifest $($ManifestSelection.Name) is missing cuda_profiler_api relative_path or sha256." + } + if ($relativePath -notmatch '^cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-[^/]+-archive\.zip$') { + throw "Unexpected cuda_profiler_api package path in $($ManifestSelection.Name): $relativePath" + } + + $pathParts = $relativePath -split '/' + $archiveName = $pathParts[$pathParts.Length - 1] + $archive = Join-Path $env:TEMP $archiveName + $extractDir = Join-Path $env:TEMP "cuda_profiler_api_$([Guid]::NewGuid().ToString('N'))" + $archiveUri = "$RedistRootUri/$relativePath" + + try { + Write-Host "Downloading CUDA Profiler API redist package: $archiveUri" + Invoke-WebRequestWithRetry -Uri $archiveUri -OutFile $archive + Assert-Sha256 -Path $archive -ExpectedSha256 $expectedSha256 + + Expand-Archive -LiteralPath $archive -DestinationPath $extractDir -Force + $payloadRoot = Get-PayloadRoot -ExtractDir $extractDir + $payloadHeader = Join-Path $payloadRoot "include\cuda_profiler_api.h" + if (-not (Test-Path $payloadHeader)) { + throw "CUDA Profiler API archive did not contain expected header: $payloadHeader" + } + + Write-Host "Installing CUDA Profiler API package into: $CudaRoot" + Copy-Item -Path (Join-Path $payloadRoot "*") -Destination $CudaRoot -Recurse -Force + } finally { + Remove-Item $archive -ErrorAction SilentlyContinue + Remove-Item $extractDir -Recurse -Force -ErrorAction SilentlyContinue + } +} + +if (-not $CUDA_VERSION) { + throw "CUDA Toolkit version is required. Provide -cudaVersion ., for example '13.0'." +} + +if ($CUDA_VERSION -notmatch '^\d+\.\d+$') { + throw "Invalid CUDA Toolkit version '$CUDA_VERSION'. Expected '.', for example '13.0'." +} + +$version = [Version]$CUDA_VERSION +$mmVersionTag = "$($version.Major).$($version.Minor)" + +$nvccCudaRoot = Get-CudaRootFromNvcc +if ($nvccCudaRoot) { + $nvccCudaVersion = Get-CudaVersionFromRoot -CudaRoot $nvccCudaRoot + if (-not $nvccCudaVersion) { + throw "Could not determine CUDA version from active nvcc.exe root: $nvccCudaRoot" + } + if ($nvccCudaVersion -ne $mmVersionTag) { + throw "Active nvcc.exe is from CUDA $nvccCudaVersion, but CUDA $mmVersionTag was requested." + } +} + +if ($env:CUDA_PATH) { + $cudaPathVersion = Get-CudaVersionFromRoot -CudaRoot $env:CUDA_PATH + if (-not $cudaPathVersion) { + throw "Could not determine CUDA version from CUDA_PATH: $env:CUDA_PATH" + } + if ($cudaPathVersion -ne $mmVersionTag) { + throw "CUDA_PATH points to CUDA $cudaPathVersion, but CUDA $mmVersionTag was requested." + } + if ($nvccCudaRoot) { + Assert-SamePath ` + -Left $env:CUDA_PATH ` + -Right $nvccCudaRoot ` + -Message "CUDA_PATH and active nvcc.exe point to different CUDA Toolkit roots." + } + $cudaRoot = $env:CUDA_PATH +} elseif ($nvccCudaRoot) { + $cudaRoot = $nvccCudaRoot +} else { + $cudaRoot = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$mmVersionTag" +} + +$profilerHeader = Join-Path $cudaRoot "include\cuda_profiler_api.h" +if (Test-Path $profilerHeader) { + Write-Host "CUDA Profiler API is already installed: $profilerHeader" + return +} + +$versionJson = Join-Path $cudaRoot "version.json" +if (-not (Test-Path $versionJson)) { + throw "CUDA Toolkit version metadata was not found: $versionJson. Cannot determine the matching cuda_profiler_api redist package." +} + +$versionData = Read-JsonFile -Path $versionJson +$manifestSelection = Select-ProfilerApiManifest ` + -CudaVersionTag $mmVersionTag ` + -VersionData $versionData +Install-ProfilerApiPackage ` + -ManifestSelection $manifestSelection ` + -CudaRoot $cudaRoot + +if (-not (Test-Path $profilerHeader)) { + throw "CUDA Profiler API installation completed, but header was not found: $profilerHeader" +} + +Write-Host "CUDA Profiler API installed: $profilerHeader" diff --git a/cmake/NVBenchCUPTI.cmake b/cmake/NVBenchCUPTI.cmake index 10a70893..18a25bd1 100644 --- a/cmake/NVBenchCUPTI.cmake +++ b/cmake/NVBenchCUPTI.cmake @@ -12,10 +12,73 @@ else() set(nvbench_cupti_root "${CUDAToolkit_LIBRARY_ROOT}") endif() +set(nvbench_cupti_library_hints "${nvbench_cupti_root}/lib64") +if (WIN32) + list(APPEND nvbench_cupti_library_hints + "${nvbench_cupti_root}/lib/x64" + "${nvbench_cupti_root}/lib" + ) +endif() + # The CUPTI targets in FindCUDAToolkit are broken: # - The dll locations are not specified # - Dependent libraries nvperf_* are not linked. # So we create our own targets: +function(nvbench_find_windows_cupti_runtime_library out_var dep_name library_path) + cmake_path(GET library_path PARENT_PATH library_dir) + set(runtime_search_dirs "${library_dir}") + + if ("${library_dir}" MATCHES "/Library/lib/x64$") + cmake_path(GET library_dir PARENT_PATH conda_lib_dir) + cmake_path(GET conda_lib_dir PARENT_PATH conda_library_dir) + list(APPEND runtime_search_dirs "${conda_library_dir}/bin") + elseif ("${library_dir}" MATCHES "/Library/lib$") + cmake_path(GET library_dir PARENT_PATH conda_library_dir) + list(APPEND runtime_search_dirs "${conda_library_dir}/bin") + endif() + + list(REMOVE_DUPLICATES runtime_search_dirs) + + foreach(runtime_search_dir IN LISTS runtime_search_dirs) + if ("${dep_name}" STREQUAL "cupti") + file(GLOB runtime_libraries LIST_DIRECTORIES false + "${runtime_search_dir}/cupti64_*.dll" + ) + if (NOT runtime_libraries) + file(GLOB runtime_libraries LIST_DIRECTORIES false + "${runtime_search_dir}/cupti.dll" + ) + endif() + else() + file(GLOB runtime_libraries LIST_DIRECTORIES false + "${runtime_search_dir}/${dep_name}.dll" + ) + endif() + + if (runtime_libraries) + list(SORT runtime_libraries COMPARE NATURAL ORDER DESCENDING) + list(LENGTH runtime_libraries num_runtime_libraries) + if (num_runtime_libraries GREATER 1) + list(GET runtime_libraries 0 runtime_library) + message(WARNING + "Found multiple runtime DLLs for ${dep_name}; selecting " + "${runtime_library}. Candidates: ${runtime_libraries}" + ) + else() + list(GET runtime_libraries 0 runtime_library) + endif() + + set(${out_var} "${runtime_library}" PARENT_SCOPE) + return() + endif() + endforeach() + + message(FATAL_ERROR + "Could not find the runtime DLL for ${dep_name}. " + "Searched these directories: ${runtime_search_dirs}" + ) +endfunction() + function(nvbench_add_cupti_dep dep_name) string(TOLOWER ${dep_name} dep_name_lower) string(TOUPPER ${dep_name} dep_name_upper) @@ -23,14 +86,26 @@ function(nvbench_add_cupti_dep dep_name) add_library(nvbench::${dep_name_lower} SHARED IMPORTED) find_library(NVBench_${dep_name_upper}_LIBRARY ${dep_name_lower} REQUIRED - DOC "The full path to lib${dep_name_lower}.so from the CUDA Toolkit." - HINTS "${nvbench_cupti_root}/lib64" + DOC "The library for ${dep_name_lower} from the CUDA Toolkit." + HINTS ${nvbench_cupti_library_hints} ) mark_as_advanced(NVBench_${dep_name_upper}_LIBRARY) - set_target_properties(nvbench::${dep_name_lower} PROPERTIES - IMPORTED_LOCATION "${NVBench_${dep_name_upper}_LIBRARY}" - ) + if (WIN32) + nvbench_find_windows_cupti_runtime_library( + NVBench_${dep_name_upper}_DLL + ${dep_name_lower} + "${NVBench_${dep_name_upper}_LIBRARY}" + ) + set_target_properties(nvbench::${dep_name_lower} PROPERTIES + IMPORTED_IMPLIB "${NVBench_${dep_name_upper}_LIBRARY}" + IMPORTED_LOCATION "${NVBench_${dep_name_upper}_DLL}" + ) + else() + set_target_properties(nvbench::${dep_name_lower} PROPERTIES + IMPORTED_LOCATION "${NVBench_${dep_name_upper}_LIBRARY}" + ) + endif() endfunction() nvbench_add_cupti_dep(cupti) diff --git a/cmake/NVBenchConfigTarget.cmake b/cmake/NVBenchConfigTarget.cmake index 7c8a4b93..74652df1 100644 --- a/cmake/NVBenchConfigTarget.cmake +++ b/cmake/NVBenchConfigTarget.cmake @@ -91,11 +91,25 @@ endif() if (CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") # fmtlib uses llvm's _BitInt internally, which is not available when compiling through nvcc: target_compile_definitions(nvbench.build_interface INTERFACE "FMT_USE_BITINT=0") + if (MSVC) + # cudafe cannot evaluate fmtlib's UTF-8 literal check even when /utf-8 is passed to the host compiler: + target_compile_definitions(nvbench.build_interface INTERFACE + $<$:FMT_UNICODE=0> + ) + endif() endif() target_compile_options(nvbench.build_interface INTERFACE $<$:-Xcudafe=--display_error_number> $<$:-Wno-deprecated-gpu-targets> + $<$,$>:-Xcompiler=/utf-8> + # Suppress cudafe diagnostics triggered by fmtlib headers when compiled through MSVC+nvcc: + # 27: character value is out of range (char32_t sentinel values in lookup tables) + # 128: loop is not reachable (dead code in constexpr string comparison) + # 2417: constexpr constructor calls non-constexpr function (bigint default ctor) + $<$,$>:-Xcudafe=--diag_suppress=27> + $<$,$>:-Xcudafe=--diag_suppress=128> + $<$,$>:-Xcudafe=--diag_suppress=2417> ) if (NVBench_ENABLE_WERROR) target_compile_options(nvbench.build_interface INTERFACE @@ -115,10 +129,49 @@ function(nvbench_config_target target_name) # the library path, other times they're in a subdirectory that isn't added to # the library path... # To simplify installed nvbench usage, add the CUPTI libraries path to the - # installed nvbench rpath: - if (NVBench_ENABLE_CUPTI AND nvbench_cupti_root) + # installed nvbench rpath (Unix only; Windows uses PATH for DLL lookup): + if (NVBench_ENABLE_CUPTI AND nvbench_cupti_root AND NOT WIN32) set_target_properties(${target_name} PROPERTIES INSTALL_RPATH "${nvbench_cupti_root}/lib64" ) endif() endfunction() + +function(nvbench_append_test_runtime_path path_modifications_var target_name) + if (NOT TARGET ${target_name}) + return() + endif() + + list(APPEND ${path_modifications_var} + "PATH=path_list_prepend:$" + ) + + set(${path_modifications_var} + "${${path_modifications_var}}" + PARENT_SCOPE + ) +endfunction() + +function(nvbench_config_test_runtime_environment test_name) + if (NOT WIN32) + return() + endif() + + set(path_modifications "") + if (TARGET nvbench) + nvbench_append_test_runtime_path(path_modifications nvbench) + else() + nvbench_append_test_runtime_path(path_modifications nvbench::nvbench) + endif() + + nvbench_append_test_runtime_path(path_modifications nvbench::cupti) + nvbench_append_test_runtime_path(path_modifications nvbench::nvperf_target) + nvbench_append_test_runtime_path(path_modifications nvbench::nvperf_host) + + if (path_modifications) + list(REMOVE_DUPLICATES path_modifications) + set_property(TEST ${test_name} + APPEND PROPERTY ENVIRONMENT_MODIFICATION ${path_modifications} + ) + endif() +endfunction() diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 061f8eb5..2abe3c7d 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -37,6 +37,7 @@ function (nvbench_add_examples_target target_prefix cuda_std) add_test(NAME ${example_name} COMMAND "$" ${example_args}) + nvbench_config_test_runtime_environment(${example_name}) # These should not deadlock. If they do, it may be that the CUDA context was created before # setting CUDA_MODULE_LOAD=EAGER in main, see NVIDIA/nvbench#136. diff --git a/exec/CMakeLists.txt b/exec/CMakeLists.txt index 775dccc9..7a9e88ec 100644 --- a/exec/CMakeLists.txt +++ b/exec/CMakeLists.txt @@ -9,35 +9,57 @@ add_dependencies(nvbench.all nvbench.ctl) nvbench_install_executables(nvbench.ctl) if (NVBench_ENABLE_TESTING) + set(ctl_test_names) + # Test: nvbench - add_test(NAME nvbench.ctl.no_args COMMAND "$") + set(test_name nvbench.ctl.no_args) + add_test(NAME ${test_name} COMMAND "$") + list(APPEND ctl_test_names ${test_name}) # Should print the version without any args: - set_property(TEST nvbench.ctl.no_args + set_property(TEST ${test_name} PROPERTY PASS_REGULAR_EXPRESSION "NVBench v" ) # Test: nvbench --version - add_test(NAME nvbench.ctl.version COMMAND "$" --version) + set(test_name nvbench.ctl.version) + add_test(NAME ${test_name} COMMAND "$" --version) + list(APPEND ctl_test_names ${test_name}) # Should print the version without any args: - set_property(TEST nvbench.ctl.version + set_property(TEST ${test_name} PROPERTY PASS_REGULAR_EXPRESSION "NVBench v" ) # Test: nvbench --list - add_test(NAME nvbench.ctl.list COMMAND "$" --list) + set(test_name nvbench.ctl.list) + add_test(NAME ${test_name} COMMAND "$" --list) + list(APPEND ctl_test_names ${test_name}) # Test: nvbench -l - add_test(NAME nvbench.ctl.l COMMAND "$" -l) + set(test_name nvbench.ctl.l) + add_test(NAME ${test_name} COMMAND "$" -l) + list(APPEND ctl_test_names ${test_name}) # Test: nvbench --help - add_test(NAME nvbench.ctl.help COMMAND "$" --help) + set(test_name nvbench.ctl.help) + add_test(NAME ${test_name} COMMAND "$" --help) + list(APPEND ctl_test_names ${test_name}) # Test: nvbench -h - add_test(NAME nvbench.ctl.h COMMAND "$" -h) + set(test_name nvbench.ctl.h) + add_test(NAME ${test_name} COMMAND "$" -h) + list(APPEND ctl_test_names ${test_name}) # Test: nvbench --help-axes - add_test(NAME nvbench.ctl.help_axes COMMAND "$" --help-axes) + set(test_name nvbench.ctl.help_axes) + add_test(NAME ${test_name} COMMAND "$" --help-axes) + list(APPEND ctl_test_names ${test_name}) # Test: nvbench --help-axis - add_test(NAME nvbench.ctl.help_axis COMMAND "$" --help-axis) + set(test_name nvbench.ctl.help_axis) + add_test(NAME ${test_name} COMMAND "$" --help-axis) + list(APPEND ctl_test_names ${test_name}) + + foreach(test_name IN LISTS ctl_test_names) + nvbench_config_test_runtime_environment(${test_name}) + endforeach() endif() diff --git a/nvbench/CMakeLists.txt b/nvbench/CMakeLists.txt index 7466dcd7..ba505bf2 100644 --- a/nvbench/CMakeLists.txt +++ b/nvbench/CMakeLists.txt @@ -101,6 +101,12 @@ target_link_libraries(nvbench fmt::fmt nvbench_json ) +target_compile_options(nvbench PUBLIC + # CCCL requires MSVC's conforming preprocessor when compiling CUDA sources + # with cl.exe as the host compiler. + $<$:/Zc:preprocessor> + $<$,$>:-Xcompiler=/Zc:preprocessor> +) # ################################################################################################## @@ -134,6 +140,10 @@ nvbench_config_target(nvbench.main) target_compile_definitions(nvbench.main PRIVATE NVBENCH_NO_IMPLICIT_SYSTEM_HEADER) # Propagate `nvbench` to consumers but keep NVBench's own build warning-visible. target_link_libraries(nvbench.main PUBLIC nvbench) +if (MSVC) + # inform MSVC that library provides main + target_link_options(nvbench.main INTERFACE "LINKER:/INCLUDE:main") +endif() # Ensure CUDA/CUPTI/NVML include dirs are visible for nvbench.main's build. target_link_libraries(nvbench.main PRIVATE ${ctk_libraries}) # Add NVBench's headers privately so the main library itself sees warnings. diff --git a/nvbench/config.cuh.in b/nvbench/config.cuh.in index d151c130..2f89f4cc 100644 --- a/nvbench/config.cuh.in +++ b/nvbench/config.cuh.in @@ -24,7 +24,11 @@ // Defined if NVBench has been built with CUPTI support. #cmakedefine NVBENCH_HAS_CUPTI +#if defined(_MSVC_LANG) +#define NVBENCH_CPLUSPLUS _MSVC_LANG +#else #define NVBENCH_CPLUSPLUS __cplusplus +#endif // Detect current dialect: #if NVBENCH_CPLUSPLUS < 201703L diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt index bbf3e190..4f160923 100644 --- a/testing/CMakeLists.txt +++ b/testing/CMakeLists.txt @@ -47,6 +47,7 @@ foreach(test_src IN LISTS test_srcs) set_target_properties(${test_name} PROPERTIES COMPILE_FEATURES cuda_std_17) nvbench_config_target(${test_name}) add_test(NAME ${test_name} COMMAND "$" ${NVBench_TEST_ARGS_${test_name}}) + nvbench_config_test_runtime_environment(${test_name}) add_dependencies(nvbench.test.all ${test_name}) endforeach() diff --git a/testing/axes_metadata.cu b/testing/axes_metadata.cu index 9e546602..d6cc441d 100644 --- a/testing/axes_metadata.cu +++ b/testing/axes_metadata.cu @@ -24,6 +24,7 @@ #include #include +#include #include #include "test_asserts.cuh" diff --git a/testing/cmake/CMakeLists.txt b/testing/cmake/CMakeLists.txt index c4e4eb77..d2082575 100644 --- a/testing/cmake/CMakeLists.txt +++ b/testing/cmake/CMakeLists.txt @@ -9,6 +9,24 @@ set(cmake_opts -D "CMAKE_CUDA_FLAGS=${CMAKE_CUDA_FLAGS}" -D "CMAKE_CUDA_ARCHITECTURES=${arches}" ) +if (WIN32) + set(cuda_host_compiler "${CMAKE_CUDA_HOST_COMPILER}") + if (NOT cuda_host_compiler) + set(cuda_host_compiler "${CMAKE_CXX_COMPILER}") + endif() + list(APPEND cmake_opts + -D "CMAKE_CUDA_HOST_COMPILER=${cuda_host_compiler}" + ) + if (CMAKE_LINKER) + list(APPEND cmake_opts -D "CMAKE_LINKER=${CMAKE_LINKER}") + endif() + if (CMAKE_RC_COMPILER) + list(APPEND cmake_opts -D "CMAKE_RC_COMPILER=${CMAKE_RC_COMPILER}") + endif() + if (CMAKE_MT) + list(APPEND cmake_opts -D "CMAKE_MT=${CMAKE_MT}") + endif() +endif() # Temporary installation prefix for tests against installed nvbench: set(tmp_install_prefix "${CMAKE_CURRENT_BINARY_DIR}/test_nvbench_install") @@ -32,6 +50,15 @@ function(nvbench_add_compile_test full_test_name_var subdir test_id) ${ARGN} --test-command "${CMAKE_CTEST_COMMAND}" --output-on-failure ) + if (WIN32) + set(path_mods "PATH=path_list_prepend:$") + if (TARGET nvbench::cupti) + list(PREPEND path_mods "PATH=path_list_prepend:$") + endif() + set_property(TEST ${test_name} PROPERTY + ENVIRONMENT_MODIFICATION ${path_mods} + ) + endif() set(${full_test_name_var} ${test_name} PARENT_SCOPE) endfunction() diff --git a/testing/cmake/test_export/CMakeLists.txt b/testing/cmake/test_export/CMakeLists.txt index e3d7d33c..f0aae8b2 100644 --- a/testing/cmake/test_export/CMakeLists.txt +++ b/testing/cmake/test_export/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.20.1) +cmake_minimum_required(VERSION 3.22.0) project(NVBenchTestExport CUDA CXX) message(STATUS "NVBench_DIR=${NVBench_DIR}") @@ -10,45 +10,107 @@ enable_testing() add_test(NAME test_bench COMMAND "$" --timeout 1) add_test(NAME nvbench_ctl COMMAND "$") -# Setup LD_LIBRARY_PATH for testing -if (UNIX) - set(ctl_lib_path "") - set(cupti_lib_path "") - - # Need to find installed libnvbench.so for installed nvbench-ctl. - # Not needed for build_tree test because of RUNPATH. - if (TEST_TYPE STREQUAL "INSTALL_TREE") - get_property(nvbench_config TARGET nvbench::nvbench - PROPERTY IMPORTED_CONFIGURATIONS - ) - - list(LENGTH nvbench_config num_configs) - if (num_configs GREATER 1) - message(WARNING - "Multiple IMPORTED_CONFIGURATIONS for nvbench::nvbench. " - "Picking the first one. This may cause issues." - ) - list(GET nvbench_config 0 nvbench_config) +# Setup runtime library paths for testing. +# Unix uses LD_LIBRARY_PATH; Windows uses PATH for DLL lookup. +function(get_imported_location out_var target_name) + get_property(imported_configs TARGET ${target_name} + PROPERTY IMPORTED_CONFIGURATIONS + ) + list(LENGTH imported_configs num_configs) + if (num_configs GREATER 1) + message(WARNING + "Multiple IMPORTED_CONFIGURATIONS for ${target_name}. " + "Picking CMAKE_BUILD_TYPE if present, otherwise the first one." + ) + endif() + + if (num_configs GREATER 0) + if (CMAKE_BUILD_TYPE) + string(TOUPPER "${CMAKE_BUILD_TYPE}" build_type) + list(FIND imported_configs "${build_type}" imported_config_index) + else() + set(imported_config_index -1) endif() + if (imported_config_index GREATER_EQUAL 0) + list(GET imported_configs ${imported_config_index} imported_config) + else() + list(GET imported_configs 0 imported_config) + endif() + get_property(imported_location TARGET ${target_name} + PROPERTY IMPORTED_LOCATION_${imported_config} + ) + endif() - get_property(ctl_lib_path TARGET nvbench::nvbench - PROPERTY IMPORTED_LOCATION_${nvbench_config} + if (NOT imported_location) + get_property(imported_location TARGET ${target_name} + PROPERTY IMPORTED_LOCATION ) - cmake_path(GET ctl_lib_path PARENT_PATH ctl_lib_path) endif() - # Need to add the CUPTI path to LD_LIBRARY_PATH to make sure CUPTI libraries - # are found at runtime: - if (TARGET nvbench::cupti) - get_property(cupti_lib_path TARGET nvbench::cupti PROPERTY IMPORTED_LOCATION) - cmake_path(GET cupti_lib_path PARENT_PATH cupti_lib_path) + set(${out_var} "${imported_location}" PARENT_SCOPE) +endfunction() + +set(nvbench_lib_dir "") +# On Unix the build tree uses RUNPATH so only the install tree needs the path. +# On Windows there is no RUNPATH so we always need the DLL directory. +if (WIN32 OR TEST_TYPE STREQUAL "INSTALL_TREE") + get_imported_location(nvbench_lib nvbench::nvbench) + if (nvbench_lib) + cmake_path(GET nvbench_lib PARENT_PATH nvbench_lib_dir) endif() +endif() - set_property(TEST test_bench PROPERTY - ENVIRONMENT "LD_LIBRARY_PATH=${cupti_lib_path}" - ) - set_property(TEST nvbench_ctl PROPERTY - ENVIRONMENT "LD_LIBRARY_PATH=${ctl_lib_path}:${cupti_lib_path}" - ) +set(cupti_lib_dir "") +if (TARGET nvbench::cupti) + get_imported_location(cupti_lib nvbench::cupti) + if (cupti_lib) + cmake_path(GET cupti_lib PARENT_PATH cupti_lib_dir) + endif() +endif() +if (WIN32) + set(path_modifications "") + if (cupti_lib_dir) + list(APPEND path_modifications "PATH=path_list_prepend:$") + endif() + if (nvbench_lib_dir) + list(APPEND path_modifications "PATH=path_list_prepend:$") + endif() + if (path_modifications) + set_property(TEST test_bench PROPERTY + ENVIRONMENT_MODIFICATION ${path_modifications} + ) + set_property(TEST nvbench_ctl PROPERTY + ENVIRONMENT_MODIFICATION ${path_modifications} + ) + endif() +else() + set(test_bench_ld_modifications "") + if (cupti_lib_dir) + list(APPEND test_bench_ld_modifications + "LD_LIBRARY_PATH=path_list_prepend:$" + ) + endif() + if (test_bench_ld_modifications) + set_property(TEST test_bench PROPERTY + ENVIRONMENT_MODIFICATION ${test_bench_ld_modifications} + ) + endif() + + set(nvbench_ctl_ld_modifications "") + if (cupti_lib_dir) + list(APPEND nvbench_ctl_ld_modifications + "LD_LIBRARY_PATH=path_list_prepend:$" + ) + endif() + if (nvbench_lib_dir) + list(APPEND nvbench_ctl_ld_modifications + "LD_LIBRARY_PATH=path_list_prepend:$" + ) + endif() + if (nvbench_ctl_ld_modifications) + set_property(TEST nvbench_ctl PROPERTY + ENVIRONMENT_MODIFICATION ${nvbench_ctl_ld_modifications} + ) + endif() endif() diff --git a/testing/device/CMakeLists.txt b/testing/device/CMakeLists.txt index b7272ee1..918b5f1a 100644 --- a/testing/device/CMakeLists.txt +++ b/testing/device/CMakeLists.txt @@ -7,6 +7,7 @@ add_dependencies(nvbench.test.all ${test_name}) if (NVBench_ENABLE_DEVICE_TESTING) add_test(NAME ${test_name} COMMAND "$") + nvbench_config_test_runtime_environment(${test_name}) set_tests_properties(${test_name} PROPERTIES # Any timeouts/warnings are hard failures for this test. FAIL_REGULAR_EXPRESSION "Warn;timed out"