Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions cuda_bindings/cuda/bindings/_test_helpers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE


# This package contains test helper utilities that may also be useful for other libraries outside of `cuda.bindings`,
# such as `cuda.core`. These utilities are not part of the public API of `cuda.bindings` and may change without notice.
44 changes: 44 additions & 0 deletions cuda_bindings/cuda/bindings/_test_helpers/arch_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE


from contextlib import contextmanager

import pytest
from cuda.bindings import _nvml as nvml


@contextmanager
def unsupported_before(device: int, expected_device_arch: nvml.DeviceArch | str | None):
device_arch = nvml.device_get_architecture(device)

if isinstance(expected_device_arch, nvml.DeviceArch):
expected_device_arch_int = int(expected_device_arch)
elif expected_device_arch == "FERMI":
expected_device_arch_int = 1
else:
expected_device_arch_int = 0

if expected_device_arch is None or expected_device_arch == "HAS_INFOROM" or device_arch == nvml.DeviceArch.UNKNOWN:
# In this case, we don't /know/ if it will fail, but we are ok if it
# does or does not.

# TODO: There are APIs that are documented as supported only if the
# device has an InfoROM, but I couldn't find a way to detect that. For
# now, they are just handled as "possibly failing".

try:
yield
except nvml.NotSupportedError:
pytest.skip(
f"Unsupported call for device architecture {nvml.DeviceArch(device_arch).name} "
f"on device '{nvml.device_get_name(device)}'"
)
elif int(device_arch) < expected_device_arch_int:
# In this case, we /know/ if will fail, and we want to assert that it does.
with pytest.raises(nvml.NotSupportedError):
yield
pytest.skip(f"Unsupported before {expected_device_arch.name}, got {nvml.device_get_name(device)}")
else:
# In this case, we /know/ it should work, and if it fails, the test should fail.
yield
1 change: 1 addition & 0 deletions cuda_bindings/tests/nvml/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import pytest
from cuda.bindings import _nvml as nvml
from cuda.bindings._test_helpers.arch_check import unsupported_before # noqa: F401


class NVMLInitializer:
Expand Down
11 changes: 3 additions & 8 deletions cuda_bindings/tests/nvml/test_compute_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import pytest
from cuda.bindings import _nvml as nvml

from .conftest import unsupported_before

COMPUTE_MODES = [
nvml.ComputeMode.COMPUTEMODE_DEFAULT,
nvml.ComputeMode.COMPUTEMODE_PROHIBITED,
Expand All @@ -16,18 +18,11 @@

@pytest.mark.skipif(sys.platform == "win32", reason="Test not supported on Windows")
def test_compute_mode_supported_nonroot(all_devices):
skip_reasons = set()
for device in all_devices:
try:
with unsupported_before(device, None):
original_compute_mode = nvml.device_get_compute_mode(device)
except nvml.NotSupportedError:
skip_reasons.add(f"nvmlDeviceGetComputeMode not supported for device {device}")
continue

for cm in COMPUTE_MODES:
with pytest.raises(nvml.NoPermissionError):
nvml.device_set_compute_mode(device, cm)
assert original_compute_mode == nvml.device_get_compute_mode(device), "Compute mode shouldn't have changed"

if skip_reasons:
pytest.skip(" ; ".join(skip_reasons))
18 changes: 5 additions & 13 deletions cuda_bindings/tests/nvml/test_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from cuda.bindings import _nvml as nvml

from . import util
from .conftest import unsupported_before


def test_gpu_get_module_id(nvml_init):
Expand All @@ -23,23 +24,14 @@ def test_gpu_get_module_id(nvml_init):


def test_gpu_get_platform_info(all_devices):
skip_reasons = set()
for device in all_devices:
if util.is_vgpu(device):
skip_reasons.add(f"Not supported on vGPU device {device}")
continue
pytest.skip(f"Not supported on vGPU device {device}")

# TODO
# if device.feature_dict.board.chip < board_class.Architecture.Blackwell:
# test_utils.skip_test("Not supported on chip before Blackwell")
# Documentation says Blackwell or newer only, but this does seem to pass
# on some newer GPUs.

try:
with unsupported_before(device, None):
platform_info = nvml.device_get_platform_info(device)
except nvml.NotSupportedError:
skip_reasons.add(f"Not supported returned, linkely NVLink is disable for {device}")
continue

assert isinstance(platform_info, nvml.PlatformInfo_v2)

if skip_reasons:
pytest.skip(" ; ".join(skip_reasons))
33 changes: 19 additions & 14 deletions cuda_bindings/tests/nvml/test_pynvml.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from cuda.bindings import _nvml as nvml

from . import util
from .conftest import unsupported_before

XFAIL_LEGACY_NVLINK_MSG = "Legacy NVLink test expected to fail."

Expand Down Expand Up @@ -66,7 +67,8 @@ def test_device_get_handle_by_pci_bus_id(ngpus, pci_info):
def test_device_get_memory_affinity(handles, scope):
size = 1024
for handle in handles:
node_set = nvml.device_get_memory_affinity(handle, size, scope)
with unsupported_before(handle, nvml.DeviceArch.KEPLER):
node_set = nvml.device_get_memory_affinity(handle, size, scope)
assert node_set is not None
assert len(node_set) == size

Expand All @@ -76,7 +78,8 @@ def test_device_get_memory_affinity(handles, scope):
def test_device_get_cpu_affinity_within_scope(handles, scope):
size = 1024
for handle in handles:
cpu_set = nvml.device_get_cpu_affinity_within_scope(handle, size, scope)
with unsupported_before(handle, nvml.DeviceArch.KEPLER):
cpu_set = nvml.device_get_cpu_affinity_within_scope(handle, size, scope)
assert cpu_set is not None
assert len(cpu_set) == size

Expand Down Expand Up @@ -136,22 +139,22 @@ def test_device_get_p2p_status(handles, index):

def test_device_get_power_usage(ngpus, handles):
for i in range(ngpus):
try:
# Note: documentation says this is supported on Fermi or newer,
# but in practice it fails on some later architectures.
with unsupported_before(handles[i], None):
power_mwatts = nvml.device_get_power_usage(handles[i])
except nvml.NotSupportedError:
pytest.skip("device_get_power_usage not supported")
assert power_mwatts >= 0.0


def test_device_get_total_energy_consumption(ngpus, handles):
for i in range(ngpus):
try:
with unsupported_before(handles[i], nvml.DeviceArch.VOLTA):
energy_mjoules1 = nvml.device_get_total_energy_consumption(handles[i])
except nvml.NotSupportedError:
pytest.skip("device_get_total_energy_consumption not supported")

for j in range(10): # idle for 150 ms
time.sleep(0.015) # and check for increase every 15 ms
energy_mjoules2 = nvml.device_get_total_energy_consumption(handles[i])
with unsupported_before(handles[i], nvml.DeviceArch.VOLTA):
energy_mjoules2 = nvml.device_get_total_energy_consumption(handles[i])
assert energy_mjoules2 >= energy_mjoules1
if energy_mjoules2 > energy_mjoules1:
break
Expand Down Expand Up @@ -182,7 +185,8 @@ def test_device_get_memory_info(ngpus, handles):

def test_device_get_utilization_rates(ngpus, handles):
for i in range(ngpus):
urate = nvml.device_get_utilization_rates(handles[i])
with unsupported_before(handles[i], "FERMI"):
urate = nvml.device_get_utilization_rates(handles[i])
assert urate.gpu >= 0
assert urate.memory >= 0

Expand Down Expand Up @@ -239,7 +243,8 @@ def test_device_get_utilization_rates(ngpus, handles):

def test_device_get_pcie_throughput(ngpus, handles):
for i in range(ngpus):
tx_bytes_tp = nvml.device_get_pcie_throughput(handles[i], nvml.PcieUtilCounter.PCIE_UTIL_TX_BYTES)
with unsupported_before(handles[i], nvml.DeviceArch.MAXWELL):
tx_bytes_tp = nvml.device_get_pcie_throughput(handles[i], nvml.PcieUtilCounter.PCIE_UTIL_TX_BYTES)
assert tx_bytes_tp >= 0
rx_bytes_tp = nvml.device_get_pcie_throughput(handles[i], nvml.PcieUtilCounter.PCIE_UTIL_RX_BYTES)
assert rx_bytes_tp >= 0
Expand Down Expand Up @@ -271,10 +276,10 @@ def test_device_get_pcie_throughput(ngpus, handles):
def test_device_get_nvlink_capability(ngpus, handles, cap_type):
for i in range(ngpus):
for j in range(nvml.NVLINK_MAX_LINKS):
try:
# By the documentation, this should be supported on PASCAL or newer,
# but this also seems to fail on newer.
with unsupported_before(handles[i], None):
cap = nvml.device_get_nvlink_capability(handles[i], j, cap_type)
except nvml.NotSupportedError:
pytest.skip("NVLink capability not supported")
assert cap >= 0


Expand Down
60 changes: 15 additions & 45 deletions cuda_core/cuda/core/system/_device.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ ClocksEventReasons = nvml.ClocksEventReasons
ClockType = nvml.ClockType
CoolerControl = nvml.CoolerControl
CoolerTarget = nvml.CoolerTarget
DeviceArch = nvml.DeviceArch
EventType = nvml.EventType
FanControlPolicy = nvml.FanControlPolicy
FieldId = nvml.FieldId
Expand All @@ -45,41 +46,6 @@ include "_performance.pxi"
include "_temperature.pxi"


class DeviceArchitecture:
"""
Device architecture enumeration.
"""

def __init__(self, architecture: int):
try:
self._architecture = nvml.DeviceArch(architecture)
except ValueError:
self._architecture = None

@property
def id(self) -> int:
"""
The numeric id of the device architecture.

Returns -1 if the device is unknown.
"""
if self._architecture is None:
return -1
return int(self._architecture)

@property
def name(self) -> str:
"""
The name of the device architecture.

Returns "Unlisted" if the device is unknown.
"""
if self._architecture is None:
return "Unlisted"
name = self._architecture.name
return name[name.rfind("_") + 1 :].title()


cdef class MemoryInfo:
"""
Memory allocation information for a device.
Expand Down Expand Up @@ -692,7 +658,8 @@ cdef class Device:
If anything other than a single `index`, `uuid` or `pci_bus_id` are specified.
"""

cdef intptr_t _handle
# This is made public for testing purposes only
cdef public intptr_t _handle

def __init__(
self,
Expand Down Expand Up @@ -952,16 +919,15 @@ cdef class Device:
return [Pstates(x) for x in nvml.device_get_supported_performance_states(self._handle)]

@property
def architecture(self) -> DeviceArchitecture:
def arch(self) -> DeviceArch:
"""
Device architecture. For example, a Tesla V100 will report
``DeviceArchitecture.name == "Volta"``, and RTX A6000 will report
``DeviceArchitecture.name == "Ampere"``. If the device returns an
architecture that is unknown to NVML then ``DeviceArchitecture.name ==
"Unknown"`` is reported, whereas an architecture that is unknown to
cuda.core.system is reported as ``DeviceArchitecture.name == "Unlisted"``.
Device architecture.

For example, a Tesla V100 will report ``DeviceArchitecture.name ==
"VOLTA"``, and RTX A6000 will report ``DeviceArchitecture.name ==
"AMPERE"``.
"""
return DeviceArchitecture(nvml.device_get_architecture(self._handle))
return DeviceArch(nvml.device_get_architecture(self._handle))

@property
def bar1_memory_info(self) -> BAR1MemoryInfo:
Expand Down Expand Up @@ -1027,6 +993,8 @@ cdef class Device:
"""
Retrieves the globally unique board serial number associated with this
device's board.

For all products with an InfoROM.
"""
return nvml.device_get_serial(self._handle)

Expand Down Expand Up @@ -1268,6 +1236,8 @@ cdef class Device:
"""
Get the addressing mode of the device.

For Turing™ or newer fully supported devices.

Addressing modes can be one of:

- :attr:`AddressingMode.DEVICE_ADDRESSING_MODE_HMM`: System allocated
Expand Down Expand Up @@ -1486,7 +1456,7 @@ __all__ = [
"CoolerInfo",
"CoolerTarget",
"Device",
"DeviceArchitecture",
"DeviceArch",
"DeviceAttributes",
"DeviceEvents",
"EventData",
Expand Down
2 changes: 1 addition & 1 deletion cuda_core/docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ CUDA system information and NVIDIA Management Library (NVML)
system.CoolerControl
system.CoolerInfo
system.CoolerTarget
system.DeviceArchitecture
system.DeviceArch
system.DeviceAttributes
system.DeviceEvents
system.EventData
Expand Down
6 changes: 6 additions & 0 deletions cuda_core/tests/system/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,9 @@
skip_if_nvml_unsupported = pytest.mark.skipif(
not system.CUDA_BINDINGS_NVML_IS_COMPATIBLE, reason="NVML support requires cuda.bindings version 12.9.6+ or 13.1.2+"
)


def unsupported_before(device, expected_device_arch):
from cuda.bindings._test_helpers.arch_check import unsupported_before as nvml_unsupported_before

return nvml_unsupported_before(device._handle, expected_device_arch)
Loading
Loading