Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
417a5d7
Squash-merge of PR #1519 (rparolin/env_var_improvements) rebased onto…
rwgk Mar 20, 2026
2164c33
replace _get_cuda_paths() with _get_cuda_path() using pathfinder
rwgk Mar 20, 2026
c004342
treat empty env vars as undefined in get_cuda_home_or_path()
rwgk Mar 20, 2026
3ba64ae
fix(pathfinder): clear get_cuda_home_or_path cache in test fixtures
rwgk Mar 20, 2026
0c6c655
fix(core): update test_build_hooks for _get_cuda_path rename, drop mu…
rwgk Mar 20, 2026
a4e38d0
refactor(core): use get_cuda_home_or_path() in test conftest skipif
rwgk Mar 20, 2026
3cb531e
refactor(core): use get_cuda_home_or_path() in examples
rwgk Mar 20, 2026
c174cd8
rename get_cuda_home_or_path -> get_cuda_path_or_home
rwgk Mar 20, 2026
24b89b5
make get_cuda_path_or_home a public API, privatize CUDA_ENV_VARS_ORDERED
rwgk Mar 20, 2026
feecb82
docs(pathfinder): manually edit 1.5.0 release notes, fix RST formatti…
rwgk Mar 20, 2026
74e2d5b
Add 1.5.0, 1.4.3, 1.4.2 in cuda_pathfinder/docs/nv-versions.json
rwgk Mar 20, 2026
5ae2976
docs: clarify that CUDA_PATH/CUDA_HOME priority comes from pathfinder
rwgk Mar 20, 2026
6d065e9
fix oversights that slipped in when manually editing cuda_pathfinder/…
rwgk Mar 21, 2026
8d3ed03
fix(pathfinder): change found_via from "CUDA_HOME" to "CUDA_PATH"
rwgk Mar 21, 2026
5e445bf
fix(build): don't import cuda.pathfinder in build_hooks.py
rwgk Mar 21, 2026
930e25d
Update pathfinder descriptor catalogs for cusparseLt release 0.9.0
rwgk Mar 23, 2026
51e6c6a
Merge branch 'main' into CUDA_PATH_CUDA_HOME_cleanup
rwgk Mar 23, 2026
23ad88c
Slightly enhance comment in _get_cuda_path()
rwgk Mar 23, 2026
7140117
Add PR #1806 to 1.5.0-notes.rst
rwgk Mar 23, 2026
1eff0f5
Merge branch 'main' into CUDA_PATH_CUDA_HOME_cleanup
rwgk Mar 23, 2026
00f202d
Merge branch 'main' into CUDA_PATH_CUDA_HOME_cleanup
rwgk Mar 23, 2026
0255083
Systematically rename find_in_cuda_home → find_in_cuda_path
rwgk Mar 23, 2026
9c13237
add _cuda_headers_available() guard to conftest files
rwgk Mar 23, 2026
a79ae81
Merge branch 'main' into CUDA_PATH_CUDA_HOME_cleanup
rwgk Mar 23, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 27 additions & 5 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,31 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0


import os

import pytest

from cuda.pathfinder import get_cuda_path_or_home
Copy link
Contributor

@cpcloud cpcloud Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To keep the repo-level core Cython gate accurate after the env-var change, this file needs os again if you preserve the include/ existence check below.

Suggested change
from cuda.pathfinder import get_cuda_path_or_home
import os
import pytest
from cuda.pathfinder import get_cuda_path_or_home



# Please keep in sync with the copy in cuda_core/tests/conftest.py.
def _cuda_headers_available() -> bool:
"""Return True if CUDA headers are available, False if no CUDA path is set.

Raises AssertionError if a CUDA path is set but has no include/ subdirectory.
"""
cuda_path = get_cuda_path_or_home()
if cuda_path is None:
return False
assert os.path.isdir(os.path.join(cuda_path, "include")), (
f"CUDA path {cuda_path} does not contain an 'include' subdirectory"
)
return True


def pytest_collection_modifyitems(config, items): # noqa: ARG001
cuda_home = os.environ.get("CUDA_HOME")
have_headers = _cuda_headers_available()
for item in items:
nodeid = item.nodeid.replace("\\", "/")

Expand All @@ -31,6 +49,10 @@ def pytest_collection_modifyitems(config, items): # noqa: ARG001
):
item.add_marker(pytest.mark.cython)

# Gate core cython tests on CUDA_HOME
if "core" in item.keywords and not cuda_home:
item.add_marker(pytest.mark.skip(reason="CUDA_HOME not set; skipping core cython tests"))
# Gate core cython tests on CUDA_PATH
if "core" in item.keywords and not have_headers:
item.add_marker(
pytest.mark.skip(
reason="Environment variable CUDA_PATH or CUDA_HOME is not set: skipping core cython tests"
)
)
2 changes: 1 addition & 1 deletion cuda_bindings/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ To run these tests:

Cython tests are located in `tests/cython` and need to be built. These builds have the same CUDA Toolkit header requirements as [Installing from Source](https://nvidia.github.io/cuda-python/cuda-bindings/latest/install.html#requirements) where the major.minor version must match `cuda.bindings`. To build them:

1. Setup environment variable `CUDA_HOME` with the path to the CUDA Toolkit installation.
1. Setup environment variable `CUDA_PATH` (or `CUDA_HOME`) with the path to the CUDA Toolkit installation. Note: If both are set, `CUDA_PATH` takes precedence.
2. Run `build_tests` script located in `test/cython` appropriate to your platform. This will both cythonize the tests and build them.

To run these tests:
Expand Down
27 changes: 15 additions & 12 deletions cuda_bindings/build_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,16 @@


@functools.cache
def _get_cuda_paths() -> list[str]:
CUDA_HOME = os.environ.get("CUDA_HOME", os.environ.get("CUDA_PATH", None))
if not CUDA_HOME:
raise RuntimeError("Environment variable CUDA_HOME or CUDA_PATH is not set")
CUDA_HOME = CUDA_HOME.split(os.pathsep)
print("CUDA paths:", CUDA_HOME)
return CUDA_HOME
def _get_cuda_path() -> str:
# Not using cuda.pathfinder.get_cuda_path_or_home() here because this
# build backend runs in an isolated venv where the cuda namespace package
# from backend-path shadows the installed cuda-pathfinder. See #1803 for
# a workaround to apply after cuda-pathfinder >= 1.5 is released.
cuda_path = os.environ.get("CUDA_PATH", os.environ.get("CUDA_HOME"))
if not cuda_path:
raise RuntimeError("Environment variable CUDA_PATH or CUDA_HOME is not set")
print("CUDA path:", cuda_path)
return cuda_path


# -----------------------------------------------------------------------
Expand Down Expand Up @@ -133,8 +136,8 @@ def _fetch_header_paths(required_headers, include_path_list):
if missing_headers:
error_message = "Couldn't find required headers: "
error_message += ", ".join(missing_headers)
cuda_paths = _get_cuda_paths()
raise RuntimeError(f'{error_message}\nIs CUDA_HOME setup correctly? (CUDA_HOME="{cuda_paths}")')
cuda_path = _get_cuda_path()
raise RuntimeError(f'{error_message}\nIs CUDA_PATH setup correctly? (CUDA_PATH="{cuda_path}")')

return header_dict

Expand Down Expand Up @@ -291,7 +294,7 @@ def _build_cuda_bindings(strip=False):

global _extensions

cuda_paths = _get_cuda_paths()
cuda_path = _get_cuda_path()

if os.environ.get("PARALLEL_LEVEL") is not None:
warn(
Expand All @@ -307,7 +310,7 @@ def _build_cuda_bindings(strip=False):
compile_for_coverage = bool(int(os.environ.get("CUDA_PYTHON_COVERAGE", "0")))

# Parse CUDA headers
include_path_list = [os.path.join(path, "include") for path in cuda_paths]
include_path_list = [os.path.join(cuda_path, "include")]
header_dict = _fetch_header_paths(_REQUIRED_HEADERS, include_path_list)
found_types, found_functions, found_values, found_struct, struct_list = _parse_headers(
header_dict, include_path_list, parser_caching
Expand Down Expand Up @@ -347,7 +350,7 @@ def _build_cuda_bindings(strip=False):
] + include_path_list
library_dirs = [sysconfig.get_path("platlib"), os.path.join(os.sys.prefix, "lib")]
cudalib_subdirs = [r"lib\x64"] if sys.platform == "win32" else ["lib64", "lib"]
library_dirs.extend(os.path.join(prefix, subdir) for prefix in cuda_paths for subdir in cudalib_subdirs)
library_dirs.extend(os.path.join(cuda_path, subdir) for subdir in cudalib_subdirs)

extra_compile_args = []
extra_link_args = []
Expand Down
9 changes: 8 additions & 1 deletion cuda_bindings/docs/source/environment_variables.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,14 @@ Runtime Environment Variables
Build-Time Environment Variables
--------------------------------

- ``CUDA_HOME`` or ``CUDA_PATH``: Specifies the location of the CUDA Toolkit.
- ``CUDA_PATH`` or ``CUDA_HOME``: Specifies the location of the CUDA Toolkit. If both are set, ``CUDA_PATH`` takes precedence.

.. note::
The ``CUDA_PATH`` > ``CUDA_HOME`` priority is determined by ``cuda-pathfinder``.
Earlier versions of ``cuda-pathfinder`` (before 1.5.0) used the opposite order
(``CUDA_HOME`` > ``CUDA_PATH``). See the
`cuda-pathfinder 1.5.0 release notes <https://nvidia.github.io/cuda-python/cuda-pathfinder/latest/release/1.5.0-notes.html>`_
for details and migration guidance.

- ``CUDA_PYTHON_PARSER_CACHING`` : bool, toggles the caching of parsed header files during the cuda-bindings build process. If caching is enabled (``CUDA_PYTHON_PARSER_CACHING`` is True), the cache path is set to ./cache_<library_name>, where <library_name> is derived from the cuda toolkit libraries used to build cuda-bindings.

Expand Down
4 changes: 2 additions & 2 deletions cuda_bindings/docs/source/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,11 @@ Requirements

[^2]: The CUDA Runtime static library (``libcudart_static.a`` on Linux, ``cudart_static.lib`` on Windows) is part of the CUDA Toolkit. If using conda packages, it is contained in the ``cuda-cudart-static`` package.

Source builds require that the provided CUDA headers are of the same major.minor version as the ``cuda.bindings`` you're trying to build. Despite this requirement, note that the minor version compatibility is still maintained. Use the ``CUDA_HOME`` (or ``CUDA_PATH``) environment variable to specify the location of your headers. For example, if your headers are located in ``/usr/local/cuda/include``, then you should set ``CUDA_HOME`` with:
Source builds require that the provided CUDA headers are of the same major.minor version as the ``cuda.bindings`` you're trying to build. Despite this requirement, note that the minor version compatibility is still maintained. Use the ``CUDA_PATH`` (or ``CUDA_HOME``) environment variable to specify the location of your headers. If both are set, ``CUDA_PATH`` takes precedence. For example, if your headers are located in ``/usr/local/cuda/include``, then you should set ``CUDA_PATH`` with:

.. code-block:: console

$ export CUDA_HOME=/usr/local/cuda
$ export CUDA_PATH=/usr/local/cuda

See `Environment Variables <environment_variables.rst>`_ for a description of other build-time environment variables.

Expand Down
2 changes: 1 addition & 1 deletion cuda_core/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Alternatively, from the repository root you can use a simple script:

Cython tests are located in `tests/cython` and need to be built. These builds have the same CUDA Toolkit header requirements as [those of cuda.bindings](https://nvidia.github.io/cuda-python/cuda-bindings/latest/install.html#requirements) where the major.minor version must match `cuda.bindings`. To build them:

1. Set up environment variable `CUDA_HOME` with the path to the CUDA Toolkit installation.
1. Set up environment variable `CUDA_PATH` (or `CUDA_HOME`) with the path to the CUDA Toolkit installation. Note: If both are set, `CUDA_PATH` takes precedence.
2. Run `build_tests` script located in `tests/cython` appropriate to your platform. This will both cythonize the tests and build them.

To run these tests:
Expand Down
42 changes: 22 additions & 20 deletions cuda_core/build_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,15 @@


@functools.cache
def _get_cuda_paths() -> list[str]:
cuda_path = os.environ.get("CUDA_PATH", os.environ.get("CUDA_HOME", None))
def _get_cuda_path() -> str:
# Not using cuda.pathfinder.get_cuda_path_or_home() here because this
# build backend runs in an isolated venv where the cuda namespace package
# from backend-path shadows the installed cuda-pathfinder. See #1803 for
# a workaround to apply after cuda-pathfinder >= 1.5 is released.
cuda_path = os.environ.get("CUDA_PATH", os.environ.get("CUDA_HOME"))
if not cuda_path:
raise RuntimeError("Environment variable CUDA_PATH or CUDA_HOME is not set")
cuda_path = cuda_path.split(os.pathsep)
print("CUDA paths:", cuda_path)
print("CUDA path:", cuda_path)
return cuda_path


Expand All @@ -60,21 +63,20 @@ def _determine_cuda_major_version() -> str:
return cuda_major

# Derive from the CUDA headers (the authoritative source for what we compile against).
cuda_path = _get_cuda_paths()
for root in cuda_path:
cuda_h = os.path.join(root, "include", "cuda.h")
try:
with open(cuda_h, encoding="utf-8") as f:
for line in f:
m = re.match(r"^#\s*define\s+CUDA_VERSION\s+(\d+)\s*$", line)
if m:
v = int(m.group(1))
# CUDA_VERSION is e.g. 12020 for 12.2.
cuda_major = str(v // 1000)
print("CUDA MAJOR VERSION:", cuda_major)
return cuda_major
except OSError:
continue
cuda_path = _get_cuda_path()
cuda_h = os.path.join(cuda_path, "include", "cuda.h")
try:
with open(cuda_h, encoding="utf-8") as f:
for line in f:
m = re.match(r"^#\s*define\s+CUDA_VERSION\s+(\d+)\s*$", line)
if m:
v = int(m.group(1))
# CUDA_VERSION is e.g. 12020 for 12.2.
cuda_major = str(v // 1000)
print("CUDA MAJOR VERSION:", cuda_major)
return cuda_major
except OSError:
pass

# CUDA_PATH or CUDA_HOME is required for the build, so we should not reach here
# in normal circumstances. Raise an error to make the issue clear.
Expand Down Expand Up @@ -132,7 +134,7 @@ def get_sources(mod_name):

return sources

all_include_dirs = [os.path.join(root, "include") for root in _get_cuda_paths()]
all_include_dirs = [os.path.join(_get_cuda_path(), "include")]
extra_compile_args = []
if COMPILE_FOR_COVERAGE:
# CYTHON_TRACE_NOGIL indicates to trace nogil functions. It is not
Expand Down
5 changes: 3 additions & 2 deletions cuda_core/examples/thread_block_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
ProgramOptions,
launch,
)
from cuda.pathfinder import get_cuda_path_or_home

# print cluster info using a kernel and store results in pinned memory
code = r"""
Expand Down Expand Up @@ -65,9 +66,9 @@ def main():
print("This example requires NumPy 2.2.5 or later", file=sys.stderr)
sys.exit(1)

cuda_path = os.environ.get("CUDA_PATH", os.environ.get("CUDA_HOME"))
cuda_path = get_cuda_path_or_home()
if cuda_path is None:
print("this example requires a valid CUDA_PATH environment variable set", file=sys.stderr)
print("This example requires CUDA_PATH or CUDA_HOME to point to a CUDA toolkit.", file=sys.stderr)
sys.exit(1)
cuda_include = os.path.join(cuda_path, "include")
if not os.path.isdir(cuda_include):
Expand Down
3 changes: 2 additions & 1 deletion cuda_core/examples/tma_tensor_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
StridedMemoryView,
launch,
)
from cuda.pathfinder import get_cuda_path_or_home

# ---------------------------------------------------------------------------
# CUDA kernel that uses TMA to load a 1-D tile into shared memory, then
Expand Down Expand Up @@ -103,7 +104,7 @@


def _get_cccl_include_paths():
cuda_path = os.environ.get("CUDA_PATH", os.environ.get("CUDA_HOME"))
cuda_path = get_cuda_path_or_home()
if cuda_path is None:
print("This example requires CUDA_PATH or CUDA_HOME to point to a CUDA toolkit.", file=sys.stderr)
sys.exit(1)
Expand Down
2 changes: 1 addition & 1 deletion cuda_core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
requires = [
"setuptools>=80",
"setuptools-scm[simple]>=8",
"Cython>=3.2,<3.3"
"Cython>=3.2,<3.3",
]
build-backend = "build_hooks"
backend-path = ["."]
Expand Down
19 changes: 18 additions & 1 deletion cuda_core/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

import pytest

from cuda.pathfinder import get_cuda_path_or_home

try:
from cuda.bindings import driver
except ImportError:
Expand Down Expand Up @@ -252,7 +254,22 @@ def test_something(memory_resource_factory):
return request.param


# Please keep in sync with the copy in the top-level conftest.py.
def _cuda_headers_available() -> bool:
"""Return True if CUDA headers are available, False if no CUDA path is set.

Raises AssertionError if a CUDA path is set but has no include/ subdirectory.
"""
cuda_path = get_cuda_path_or_home()
if cuda_path is None:
return False
assert os.path.isdir(os.path.join(cuda_path, "include")), (
f"CUDA path {cuda_path} does not contain an 'include' subdirectory"
)
return True


skipif_need_cuda_headers = pytest.mark.skipif(
not os.path.isdir(os.path.join(os.environ.get("CUDA_PATH", ""), "include")),
not _cuda_headers_available(),
reason="need CUDA header",
)
5 changes: 3 additions & 2 deletions cuda_core/tests/helpers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import functools
import os
from typing import Union

from cuda.core._utils.cuda_utils import handle_return
from cuda.pathfinder import get_cuda_path_or_home
from cuda_python_test_helpers import *

CUDA_PATH = os.environ.get("CUDA_PATH")
CUDA_PATH = get_cuda_path_or_home()
CUDA_INCLUDE_PATH = None
CCCL_INCLUDE_PATHS = None
if CUDA_PATH is not None:
Expand Down
6 changes: 3 additions & 3 deletions cuda_core/tests/test_build_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def _check_version_detection(
cuda_h = include_dir / "cuda.h"
cuda_h.write_text(f"#define CUDA_VERSION {cuda_version}\n")

build_hooks._get_cuda_paths.cache_clear()
build_hooks._get_cuda_path.cache_clear()
build_hooks._determine_cuda_major_version.cache_clear()

mock_env = {
Expand All @@ -90,7 +90,7 @@ class TestGetCudaMajorVersion:
@pytest.mark.parametrize("version", ["11", "12", "13", "14"])
def test_env_var_override(self, version):
"""CUDA_CORE_BUILD_MAJOR env var override works with various versions."""
build_hooks._get_cuda_paths.cache_clear()
build_hooks._get_cuda_path.cache_clear()
build_hooks._determine_cuda_major_version.cache_clear()
with mock.patch.dict(os.environ, {"CUDA_CORE_BUILD_MAJOR": version}, clear=False):
result = build_hooks._determine_cuda_major_version()
Expand Down Expand Up @@ -123,7 +123,7 @@ def test_env_var_takes_priority_over_headers(self):

def test_missing_cuda_path_raises_error(self):
"""RuntimeError is raised when CUDA_PATH/CUDA_HOME not set and no env var override."""
build_hooks._get_cuda_paths.cache_clear()
build_hooks._get_cuda_path.cache_clear()
build_hooks._determine_cuda_major_version.cache_clear()
with (
mock.patch.dict(os.environ, {}, clear=True),
Expand Down
1 change: 1 addition & 0 deletions cuda_pathfinder/cuda/pathfinder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
from cuda.pathfinder._static_libs.find_static_lib import (
locate_static_lib as locate_static_lib,
)
from cuda.pathfinder._utils.env_vars import get_cuda_path_or_home as get_cuda_path_or_home

from cuda.pathfinder._version import __version__ # isort: skip

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import shutil

from cuda.pathfinder._binaries import supported_nvidia_binaries
from cuda.pathfinder._utils.env_vars import get_cuda_home_or_path
from cuda.pathfinder._utils.env_vars import get_cuda_path_or_home
from cuda.pathfinder._utils.find_sub_dirs import find_sub_dirs_all_sitepackages
from cuda.pathfinder._utils.platform_aware import IS_WINDOWS

Expand Down Expand Up @@ -97,7 +97,7 @@ def find_nvidia_binary_utility(utility_name: str) -> str | None:
dirs.append(os.path.join(conda_prefix, "bin"))

# 3. Search in CUDA Toolkit (CUDA_HOME/CUDA_PATH)
if (cuda_home := get_cuda_home_or_path()) is not None:
if (cuda_home := get_cuda_path_or_home()) is not None:
if IS_WINDOWS:
dirs.append(os.path.join(cuda_home, "bin", "x64"))
dirs.append(os.path.join(cuda_home, "bin", "x86_64"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class LoadedDL:
abs_path: str | None
was_already_loaded_from_elsewhere: bool
_handle_uint: int # Platform-agnostic unsigned pointer value
found_via: str
found_via: str # "CUDA_PATH" covers both CUDA_PATH and CUDA_HOME env vars


def load_dependencies(desc: LibDescriptor, load_func: Callable[[str], LoadedDL]) -> None:
Expand Down
Loading
Loading