Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/blossom-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ jobs:
repository: ${{ fromJson(needs.Authorization.outputs.args).repo }}
ref: ${{ fromJson(needs.Authorization.outputs.args).ref }}
lfs: 'true'
fetch-depth: 0
- name: Run blossom action
uses: NVIDIA/blossom-action@main
env:
Expand Down
4 changes: 4 additions & 0 deletions build_config/accvlab_build_config/helpers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@
"""

from .build_utils import (
CudaArchitectureSelection,
load_config,
detect_cuda_info,
get_compile_flags,
select_cuda_architectures_for_nvcc,
run_external_build,
get_abs_setup_dir,
)
Expand All @@ -29,9 +31,11 @@
)

__all__ = [
'CudaArchitectureSelection',
'load_config',
'detect_cuda_info',
'get_compile_flags',
'select_cuda_architectures_for_nvcc',
'run_external_build',
'get_abs_setup_dir',
'build_cmake_args',
Expand Down
172 changes: 162 additions & 10 deletions build_config/accvlab_build_config/helpers/build_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,150 @@
"""

import os
import re
from pathlib import Path
import shutil
import subprocess
import sys
from typing import Optional
from typing import List, NamedTuple, Optional


class CudaArchitectureSelection(NamedTuple):
"""CUDA architecture selection compatible with the available ``nvcc``.

Attributes:
architectures: CUDA architectures to build as cubin targets.
ptx_architectures: At most one architecture to build as a PTX target
because a detected GPU architecture had to be capped.
"""

architectures: List[str]
ptx_architectures: List[str]


def _find_nvcc() -> Optional[str]:
"""
Locate the CUDA compiler used to determine supported target architectures.
"""
candidate = os.environ.get("CUDACXX")
if candidate:
return candidate

for env_var in ("CUDA_HOME", "CUDA_PATH"):
cuda_root = os.environ.get(env_var)
if cuda_root:
candidate = os.path.join(cuda_root, "bin", "nvcc")
if os.path.exists(candidate):
return candidate

return shutil.which("nvcc")


def _detect_nvcc_supported_architectures() -> List[str]:
"""
Ask nvcc which virtual GPU architectures it supports.
Returns values like ['70', '75', '80', '90'].
"""
nvcc = _find_nvcc()
if not nvcc:
return []

try:
result = subprocess.run(
[nvcc, "--list-gpu-arch"],
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
timeout=10,
)
except Exception:
return []

archs: List[str] = []
for match in re.finditer(r"compute_([0-9]+)", result.stdout):
arch = match.group(1)
if arch not in archs:
archs.append(arch)

return sorted(archs, key=int)


def _split_cuda_architectures(value: str) -> List[str]:
return [arch.strip() for arch in re.split(r"[,;]", value) if arch.strip()]


def _forward_compatible_ptx_architecture(
supported_architectures: List[str], max_architecture: int
) -> Optional[str]:
forward_compatible_archs: List[str] = []
fallback_archs: List[str] = []
for arch in supported_architectures:
try:
arch_int = int(arch)
except ValueError:
continue

if arch_int > max_architecture:
continue

fallback_archs.append(arch)
if arch_int % 10 == 0:
forward_compatible_archs.append(arch)

if forward_compatible_archs:
return max(forward_compatible_archs, key=int)
if fallback_archs:
return max(fallback_archs, key=int)
return None


def select_cuda_architectures_for_nvcc(
cuda_architectures: List[str],
) -> CudaArchitectureSelection:
"""Select CUDA cubin and PTX targets supported by the installed ``nvcc``.

Numeric architectures above ``nvcc``'s maximum supported architecture are
capped to that maximum. When capping occurs, one PTX target is added using
the newest forward-compatible base architecture supported by ``nvcc`` at or
below the capped architecture. For example, if the highest supported
architecture is ``96``, the PTX target is ``90``.

Args:
cuda_architectures: CUDA architecture numbers to select from, for
example ``["80", "90", "103"]``.

Returns:
CudaArchitectureSelection: The capped cubin architectures and, when
capping occurred, the single architecture to emit as a PTX target. If
``nvcc`` cannot be found or queried, the input architectures are returned
unchanged and no PTX targets are added.
"""
supported_archs = _detect_nvcc_supported_architectures()
if not cuda_architectures or not supported_archs:
return CudaArchitectureSelection(cuda_architectures, [])

max_supported = max(int(arch) for arch in supported_archs)
capped_archs: List[str] = []
any_arch_capped = False
for arch in cuda_architectures:
try:
arch_int = int(arch)
capped_arch = str(min(arch_int, max_supported))
any_arch_capped = any_arch_capped or arch_int > max_supported
except ValueError:
capped_arch = arch

if capped_arch not in capped_archs:
capped_archs.append(capped_arch)

ptx_archs: List[str] = []
if any_arch_capped:
ptx_arch = _forward_compatible_ptx_architecture(supported_archs, max_supported)
if ptx_arch is not None:
ptx_archs.append(ptx_arch)

return CudaArchitectureSelection(capped_archs, ptx_archs)


def missing_torch_error() -> RuntimeError:
Expand Down Expand Up @@ -106,8 +246,8 @@ def load_config(default_config: Optional[dict] = None) -> dict:
config[key] = env_val.lower() in ('1', 'true', 'yes', 'on')
elif isinstance(config[key], int):
config[key] = int(env_val)
elif key == 'CUSTOM_CUDA_ARCHS' and env_val:
config[key] = env_val.split(',')
elif key == 'CUSTOM_CUDA_ARCHS':
config[key] = _split_cuda_architectures(env_val) if env_val else None
else:
config[key] = env_val

Expand Down Expand Up @@ -152,7 +292,12 @@ def detect_cuda_info():


def get_compile_flags(config, cuda_info, include_dirs=None):
"""Construct compilation flags
"""Construct compilation flags.

If ``CUSTOM_CUDA_ARCHS`` is unset, detected CUDA architectures are capped to
the maximum supported by ``nvcc``. If any architecture is capped, the newest
forward-compatible base architecture supported by ``nvcc`` is also emitted
as a PTX target.

Args:
config (dict): Build configuration
Expand Down Expand Up @@ -202,17 +347,24 @@ def get_compile_flags(config, cuda_info, include_dirs=None):

# CUDA flags (only if CUDA is available)
if cuda_info['cuda_available']:
cuda_archs = (
config['CUSTOM_CUDA_ARCHS']
if config['CUSTOM_CUDA_ARCHS'] is not None
else cuda_info['gpu_architectures']
)
ptx_archs: List[str] = []
if config['CUSTOM_CUDA_ARCHS'] is not None:
cuda_archs = config['CUSTOM_CUDA_ARCHS']
else:
arch_selection = select_cuda_architectures_for_nvcc(cuda_info['gpu_architectures'])
cuda_archs = arch_selection.architectures
ptx_archs = arch_selection.ptx_architectures

if not cuda_archs:
cuda_archs = ['70', '75', '80', '86'] # Default modern architectures
arch_selection = select_cuda_architectures_for_nvcc(['70', '75', '80', '86'])
cuda_archs = arch_selection.architectures
ptx_archs = arch_selection.ptx_architectures

# Generate architecture flags
for arch in cuda_archs:
flags['nvcc'].extend([f'-gencode=arch=compute_{arch},code=sm_{arch}'])
for arch in ptx_archs:
flags['nvcc'].extend([f'-gencode=arch=compute_{arch},code=compute_{arch}'])

# CUDA compilation flags
flags['nvcc'].extend(
Expand Down
60 changes: 28 additions & 32 deletions build_config/accvlab_build_config/helpers/cmake_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pathlib import Path
from typing import List, Optional

from .build_utils import missing_torch_error, require_torch_cuda_support
from .build_utils import detect_cuda_info, select_cuda_architectures_for_nvcc

# Marker file at the ACCV-Lab monorepo root (see `.nav` in the repository).
_NAV_MARKER = ".nav"
Expand Down Expand Up @@ -57,35 +57,16 @@ def _normalize_cpp_standard(value: str) -> str:
return v


def _detect_cuda_architectures() -> List[str]:
"""
Try to detect CUDA architectures from PyTorch if available.

Returns a list like ['70', '75', '80']. Returns an empty list if PyTorch is
CUDA-enabled but no CUDA devices are available.

Raises:
RuntimeError: If PyTorch is not installed or is installed without CUDA
support. ACCV-Lab CUDA extension builds require a CUDA-enabled
PyTorch wheel, so this is treated as a build configuration error
rather than as "CUDA not detected".
"""
try:
import torch # type: ignore
except ImportError as exc:
raise missing_torch_error() from exc

require_torch_cuda_support(torch)
def _format_cmake_cuda_architectures(archs: List[str], ptx_archs: List[str]) -> List[str]:
if not ptx_archs:
return archs

if not torch.cuda.is_available():
return []
arches: List[str] = []
for i in range(torch.cuda.device_count()):
major, minor = torch.cuda.get_device_capability(i)
arch = f"{major}{minor}"
if arch not in arches:
arches.append(arch)
return arches
cmake_archs: List[str] = []
for arch in archs:
cmake_archs.append(f"{arch}-real")
for arch in ptx_archs:
cmake_archs.append(f"{arch}-virtual")
return cmake_archs


def get_project_root() -> Path:
Expand All @@ -112,6 +93,11 @@ def _build_cmake_args_from_env() -> List[str]:
"""
Build a list of -D CMake arguments from environment variables to harmonize
build configuration across setuptools, external CMake, and scikit-build flows.

If ``CUSTOM_CUDA_ARCHS`` is unset, detected CUDA architectures are capped to
the maximum supported by ``nvcc``. If capping occurs, CMake builds cubins for
the capped architectures and adds one PTX target for the newest supported
forward-compatible base architecture.
"""
args: List[str] = []
# Always export compile_commands.json for tooling/validation
Expand Down Expand Up @@ -139,9 +125,15 @@ def _build_cmake_args_from_env() -> List[str]:
args.append(f'-DCMAKE_CUDA_ARCHITECTURES={norm_archs}')
else:
# Attempt auto-detection via torch; if empty, let CMake defaults apply
detected = _detect_cuda_architectures()
cuda_info = detect_cuda_info()
detected = cuda_info['gpu_architectures'] if cuda_info['cuda_available'] else []
if detected:
args.append(f'-DCMAKE_CUDA_ARCHITECTURES={";".join(detected)}')
selection = select_cuda_architectures_for_nvcc(detected)
cmake_archs = _format_cmake_cuda_architectures(
selection.architectures,
selection.ptx_architectures,
)
args.append(f'-DCMAKE_CUDA_ARCHITECTURES={";".join(cmake_archs)}')

# VERBOSE_BUILD -> CMAKE_VERBOSE_MAKEFILE
if _parse_bool_env(os.environ.get("VERBOSE_BUILD", "")):
Expand Down Expand Up @@ -186,7 +178,7 @@ def _build_cmake_args_package_scm_version(repo_root: Path) -> List[str]:
Pass numeric version from setuptools-scm to CMake as a repo-aligned package
version define (and harmless for CMake projects that ignore the variable).
"""
from setuptools_scm import get_version
from setuptools_scm import get_version # type: ignore

v = get_version(
root=str(repo_root),
Expand All @@ -203,6 +195,10 @@ def _build_cmake_args_package_scm_version(repo_root: Path) -> List[str]:
def build_cmake_args() -> List[str]:
"""
Full CMake -D list: environment-based flags plus repo-aligned SCM version define.

Auto-detected CUDA architectures are capped to ``nvcc`` support when
``CUSTOM_CUDA_ARCHS`` is unset. If capping occurs, one PTX target is emitted
for the newest supported forward-compatible base architecture.
"""
root = get_project_root()
return _build_cmake_args_from_env() + _build_cmake_args_package_scm_version(root)
Expand Down
10 changes: 5 additions & 5 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

FROM nvidia/cuda:12.4.1-devel-ubuntu22.04
FROM nvidia/cuda:12.8.2-devel-ubuntu22.04

ENV DEBIAN_FRONTEND=noninteractive

Expand Down Expand Up @@ -86,7 +86,7 @@ RUN pip install numpy==1.23.5 \
scipy==1.15.3 \
opencv-python-headless==4.5.5.64

RUN pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu124
RUN pip install torch==2.11.0 torchvision==0.26.0 torchaudio==2.11.0 --index-url https://download.pytorch.org/whl/cu128

RUN pip install black

Expand All @@ -101,13 +101,13 @@ RUN pip install sphinx \
RUN pip install ninja \
scikit-build

RUN pip install pycuda==2025.1.1 \
RUN pip install pycuda==2025.1.2 \
pybind11==3.0.0 \
cvcuda-cu12==0.15.0

RUN pip install pandas==1.5.3 \
IPython \
nvidia-dali-cuda120==1.51.2 \
nvidia-dali-cuda120==1.53.0 \
nvtx \
psutil \
numba==0.59 \
Expand All @@ -123,7 +123,7 @@ RUN apt-get install -y libjpeg-dev zlib1g-dev

RUN pip install --upgrade pip setuptools==80.9.0 wheel setuptools-scm>=8

RUN pip install cupy==13.6.0
RUN pip install cupy-cuda12x==13.6.0

WORKDIR /workspace

Expand Down
Loading