From abdec47f2c3de514a02d14f08fffe3fc097ed729 Mon Sep 17 00:00:00 2001
From: Rob Parolin <rparolin@nvidia.com>
Date: Mon, 16 Mar 2026 17:37:49 -0700
Subject: [PATCH 01/16] wip

---
 cuda_core/cuda/core/_memory/_buffer.pxd       |   1 +
 cuda_core/cuda/core/_memory/_buffer.pyx       | 284 ++++++++++++++++++
 cuda_core/docs/source/release/0.7.x-notes.rst |   5 +
 cuda_core/tests/test_memory.py                | 127 ++++++++
 4 files changed, 417 insertions(+)

diff --git a/cuda_core/cuda/core/_memory/_buffer.pxd b/cuda_core/cuda/core/_memory/_buffer.pxd
index 91c0cfe24a..04b5707e18 100644
--- a/cuda_core/cuda/core/_memory/_buffer.pxd
+++ b/cuda_core/cuda/core/_memory/_buffer.pxd
@@ -12,6 +12,7 @@ cdef struct _MemAttrs:
     int device_id
     bint is_device_accessible
     bint is_host_accessible
+    bint is_managed
 
 
 cdef class Buffer:
diff --git a/cuda_core/cuda/core/_memory/_buffer.pyx b/cuda_core/cuda/core/_memory/_buffer.pyx
index 83009f74ae..686585b527 100644
--- a/cuda_core/cuda/core/_memory/_buffer.pyx
+++ b/cuda_core/cuda/core/_memory/_buffer.pyx
@@ -72,6 +72,194 @@ A type union of :obj:`~driver.CUdeviceptr`, `int` and `None` for hinting
 :attr:`Buffer.handle`.
 """
 
+
+cdef tuple _VALID_MANAGED_LOCATION_TYPES = (
+    "device",
+    "host",
+    "host_numa",
+    "host_numa_current",
+)
+
+cdef dict _MANAGED_LOCATION_TYPE_ATTRS = {
+    "device": "CU_MEM_LOCATION_TYPE_DEVICE",
+    "host": "CU_MEM_LOCATION_TYPE_HOST",
+    "host_numa": "CU_MEM_LOCATION_TYPE_HOST_NUMA",
+    "host_numa_current": "CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT",
+}
+
+cdef dict _MANAGED_ADVICE_ALIASES = {
+    "set_read_mostly": "CU_MEM_ADVISE_SET_READ_MOSTLY",
+    "cu_mem_advise_set_read_mostly": "CU_MEM_ADVISE_SET_READ_MOSTLY",
+    "unset_read_mostly": "CU_MEM_ADVISE_UNSET_READ_MOSTLY",
+    "cu_mem_advise_unset_read_mostly": "CU_MEM_ADVISE_UNSET_READ_MOSTLY",
+    "set_preferred_location": "CU_MEM_ADVISE_SET_PREFERRED_LOCATION",
+    "cu_mem_advise_set_preferred_location": "CU_MEM_ADVISE_SET_PREFERRED_LOCATION",
+    "unset_preferred_location": "CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION",
+    "cu_mem_advise_unset_preferred_location": "CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION",
+    "set_accessed_by": "CU_MEM_ADVISE_SET_ACCESSED_BY",
+    "cu_mem_advise_set_accessed_by": "CU_MEM_ADVISE_SET_ACCESSED_BY",
+    "unset_accessed_by": "CU_MEM_ADVISE_UNSET_ACCESSED_BY",
+    "cu_mem_advise_unset_accessed_by": "CU_MEM_ADVISE_UNSET_ACCESSED_BY",
+}
+
+cdef frozenset _MANAGED_ADVICE_IGNORE_LOCATION = frozenset((
+    "set_read_mostly",
+    "unset_read_mostly",
+    "unset_preferred_location",
+))
+
+cdef frozenset _MANAGED_ADVICE_HOST_OR_DEVICE_ONLY = frozenset((
+    "set_accessed_by",
+    "unset_accessed_by",
+))
+
+
+cdef inline object _managed_location_enum(str location_type):
+    cdef str attr_name = _MANAGED_LOCATION_TYPE_ATTRS[location_type]
+    if not hasattr(driver.CUmemLocationType, attr_name):
+        raise RuntimeError(
+            f"Managed-memory location type {location_type!r} is not supported by the "
+            f"installed cuda.bindings package."
+        )
+    return getattr(driver.CUmemLocationType, attr_name)
+
+
+cdef inline object _make_managed_location(str location_type, int location_id):
+    cdef object location = driver.CUmemLocation()
+    location.type = _managed_location_enum(location_type)
+    if location_type == "host":
+        location.id = int(getattr(driver, "CU_DEVICE_CPU", -1))
+    elif location_type == "host_numa_current":
+        location.id = 0
+    else:
+        location.id = location_id
+    return location
+
+
+cdef inline tuple _normalize_managed_advice(object advice):
+    cdef str alias
+    cdef str attr_name
+    if isinstance(advice, str):
+        alias = advice.lower()
+        attr_name = _MANAGED_ADVICE_ALIASES.get(alias)
+        if attr_name is None:
+            raise ValueError(
+                "advice must be one of "
+                f"{tuple(sorted(_MANAGED_ADVICE_ALIASES))!r}, got {advice!r}"
+            )
+        return alias, getattr(driver.CUmem_advise, attr_name)
+
+    if isinstance(advice, driver.CUmem_advise):
+        for alias, attr_name in _MANAGED_ADVICE_ALIASES.items():
+            if alias.startswith("cu_mem_advise_"):
+                continue
+            if advice == getattr(driver.CUmem_advise, attr_name):
+                return alias, advice
+        raise ValueError(f"Unsupported advice value: {advice!r}")
+
+    raise TypeError(
+        "advice must be a cuda.bindings.driver.CUmem_advise value or a supported string alias"
+    )
+
+
+cdef inline object _normalize_managed_location(
+    object location,
+    object location_type,
+    str what,
+    bint allow_none=False,
+    bint allow_host=True,
+    bint allow_host_numa=True,
+    bint allow_host_numa_current=True,
+):
+    cdef object loc_type
+    cdef int loc_id
+
+    if isinstance(location, Device):
+        location = (<Device>location).device_id
+
+    if location_type is not None and not isinstance(location_type, str):
+        raise TypeError(f"{what} location_type must be a string or None, got {type(location_type).__name__}")
+
+    loc_type = None if location_type is None else (<str>location_type).lower()
+    if loc_type is not None and loc_type not in _VALID_MANAGED_LOCATION_TYPES:
+        raise ValueError(
+            f"{what} location_type must be one of {_VALID_MANAGED_LOCATION_TYPES!r} "
+            f"or None, got {location_type!r}"
+        )
+
+    if loc_type is None:
+        if location is None:
+            if allow_none:
+                return _make_managed_location("host", -1)
+            raise ValueError(f"{what} requires a location")
+        if not isinstance(location, int):
+            raise TypeError(
+                f"{what} location must be a Device, int, or None, got {type(location).__name__}"
+            )
+        loc_id = <int>location
+        if loc_id == -1:
+            loc_type = "host"
+        elif loc_id >= 0:
+            loc_type = "device"
+        else:
+            raise ValueError(
+                f"{what} location must be a device ordinal (>= 0), -1 for host, or None; got {location!r}"
+            )
+    elif loc_type == "device":
+        if isinstance(location, int) and <int>location >= 0:
+            loc_id = <int>location
+        else:
+            raise ValueError(
+                f"{what} location must be a device ordinal (>= 0) when location_type is 'device', got {location!r}"
+            )
+        return _make_managed_location(loc_type, loc_id)
+    elif loc_type == "host":
+        if location not in (None, -1):
+            raise ValueError(
+                f"{what} location must be None or -1 when location_type is 'host', got {location!r}"
+            )
+        if not allow_host:
+            raise ValueError(f"{what} does not support location_type='host'")
+        return _make_managed_location(loc_type, -1)
+    elif loc_type == "host_numa":
+        if not allow_host_numa:
+            raise ValueError(f"{what} does not support location_type='host_numa'")
+        if not isinstance(location, int) or <int>location < 0:
+            raise ValueError(
+                f"{what} location must be a NUMA node ID (>= 0) when location_type is 'host_numa', got {location!r}"
+            )
+        return _make_managed_location(loc_type, <int>location)
+    else:
+        if not allow_host_numa_current:
+            raise ValueError(f"{what} does not support location_type='host_numa_current'")
+        if location is not None:
+            raise ValueError(
+                f"{what} location must be None when location_type is 'host_numa_current', got {location!r}"
+            )
+        return _make_managed_location(loc_type, 0)
+
+    if loc_type == "host" and not allow_host:
+        raise ValueError(f"{what} does not support host locations")
+    if loc_type == "host_numa" and not allow_host_numa:
+        raise ValueError(f"{what} does not support location_type='host_numa'")
+    if loc_type == "host_numa_current" and not allow_host_numa_current:
+        raise ValueError(f"{what} does not support location_type='host_numa_current'")
+    return _make_managed_location(<str>loc_type, loc_id)
+
+
+cdef inline void _require_managed_buffer(Buffer self, str what):
+    _init_mem_attrs(self)
+    if not self._mem_attrs.is_managed:
+        raise ValueError(f"{what} requires a managed-memory buffer")
+
+
+cdef inline void _require_managed_discard_prefetch_support():
+    if not hasattr(driver, "cuMemDiscardAndPrefetchBatchAsync"):
+        raise RuntimeError(
+            "Buffer.discard_prefetch requires cuda.bindings support for "
+            "cuMemDiscardAndPrefetchBatchAsync"
+        )
+
 cdef class Buffer:
     """Represent a handle to allocated memory.
 
@@ -293,6 +481,99 @@ cdef class Buffer:
         finally:
             PyBuffer_Release(&buf)
 
+    def advise(
+        self,
+        advice: driver.CUmem_advise | str,
+        location: Device | int | None = None,
+        *,
+        location_type: str | None = None,
+    ):
+        """Apply a managed-memory advice to this buffer.
+
+        This method is only valid for buffers backed by managed memory.
+
+        Parameters
+        ----------
+        advice : :obj:`~driver.CUmem_advise` | str
+            Managed-memory advice to apply. String aliases such as
+            ``"set_read_mostly"``, ``"set_preferred_location"``, and
+            ``"set_accessed_by"`` are accepted.
+        location : :obj:`~_device.Device` | int | None, optional
+            Target location. When ``location_type`` is ``None``, values are
+            interpreted as a device ordinal, ``-1`` for host, or ``None`` for
+            advice values that ignore location.
+        location_type : str | None, optional
+            Explicit location kind. Supported values are ``"device"``,
+            ``"host"``, ``"host_numa"``, and ``"host_numa_current"``.
+        """
+        cdef str advice_name
+        _require_managed_buffer(self, "Buffer.advise")
+        advice_name, advice = _normalize_managed_advice(advice)
+        location = _normalize_managed_location(
+            location,
+            location_type,
+            "Buffer.advise",
+            allow_none=advice_name in _MANAGED_ADVICE_IGNORE_LOCATION,
+            allow_host=True,
+            allow_host_numa=advice_name not in _MANAGED_ADVICE_HOST_OR_DEVICE_ONLY,
+            allow_host_numa_current=advice_name == "set_preferred_location",
+        )
+        handle_return(driver.cuMemAdvise(self.handle, self._size, advice, location))
+
+    def prefetch(
+        self,
+        location: Device | int | None = None,
+        *,
+        stream: Stream | GraphBuilder,
+        location_type: str | None = None,
+    ):
+        """Prefetch this managed-memory buffer to a target location."""
+        cdef Stream s = Stream_accept(stream)
+        _require_managed_buffer(self, "Buffer.prefetch")
+        location = _normalize_managed_location(
+            location,
+            location_type,
+            "Buffer.prefetch",
+            allow_none=False,
+            allow_host=True,
+            allow_host_numa=True,
+            allow_host_numa_current=True,
+        )
+        handle_return(driver.cuMemPrefetchAsync(self.handle, self._size, location, 0, s.handle))
+
+    def discard_prefetch(
+        self,
+        location: Device | int | None = None,
+        *,
+        stream: Stream | GraphBuilder,
+        location_type: str | None = None,
+    ):
+        """Discard this managed-memory buffer and prefetch it to a target location."""
+        cdef Stream s = Stream_accept(stream)
+        _require_managed_buffer(self, "Buffer.discard_prefetch")
+        _require_managed_discard_prefetch_support()
+        location = _normalize_managed_location(
+            location,
+            location_type,
+            "Buffer.discard_prefetch",
+            allow_none=False,
+            allow_host=True,
+            allow_host_numa=True,
+            allow_host_numa_current=True,
+        )
+        handle_return(
+            driver.cuMemDiscardAndPrefetchBatchAsync(
+                [self.handle],
+                [self._size],
+                1,
+                [location],
+                [0],
+                1,
+                0,
+                s.handle,
+            )
+        )
+
     def __dlpack__(
         self,
         *,
@@ -453,6 +734,7 @@ cdef inline int _query_memory_attrs(
         out.is_host_accessible = True
         out.is_device_accessible = False
         out.device_id = -1
+        out.is_managed = False
     elif (
         is_managed
         or memory_type == cydriver.CUmemorytype.CU_MEMORYTYPE_HOST
@@ -461,10 +743,12 @@ cdef inline int _query_memory_attrs(
         out.is_host_accessible = True
         out.is_device_accessible = True
         out.device_id = device_id
+        out.is_managed = is_managed != 0
     elif memory_type == cydriver.CUmemorytype.CU_MEMORYTYPE_DEVICE:
         out.is_host_accessible = False
         out.is_device_accessible = True
         out.device_id = device_id
+        out.is_managed = False
     else:
         with cython.gil:
             raise ValueError(f"Unsupported memory type: {memory_type}")
diff --git a/cuda_core/docs/source/release/0.7.x-notes.rst b/cuda_core/docs/source/release/0.7.x-notes.rst
index 98551603b6..18b3bede36 100644
--- a/cuda_core/docs/source/release/0.7.x-notes.rst
+++ b/cuda_core/docs/source/release/0.7.x-notes.rst
@@ -35,6 +35,11 @@ New features
   preference, or a tuple such as ``("device", 0)``, ``("host", None)``, or
   ``("host_numa", 3)``.
 
+- Added managed-memory controls on :class:`Buffer`: ``advise()``,
+  ``prefetch()``, and ``discard_prefetch()``. These methods validate that the
+  underlying allocation is managed memory and then forward to the corresponding
+  CUDA driver operations for range advice and migration.
+
 - Added ``numa_id`` option to :class:`PinnedMemoryResourceOptions` for explicit
   control over host NUMA node placement. When ``ipc_enabled=True`` and
   ``numa_id`` is not set, the NUMA node is automatically derived from the
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index 0473d2d183..dd146785ec 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -1134,6 +1134,133 @@ def test_managed_memory_resource_preferred_location_validation(init_cuda):
         )
 
 
+def _get_mem_range_attr(buffer, attribute, data_size):
+    return handle_return(driver.cuMemRangeGetAttribute(data_size, attribute, buffer.handle, buffer.size))
+
+
+def test_managed_buffer_advise_prefetch_and_discard_prefetch(init_cuda):
+    device = Device()
+    skip_if_managed_memory_unsupported(device)
+    device.set_current()
+
+    if not hasattr(driver, "cuMemDiscardAndPrefetchBatchAsync"):
+        pytest.skip("discard-prefetch requires cuda.bindings support")
+
+    mr = create_managed_memory_resource_or_skip()
+    buffer = mr.allocate(4096)
+    stream = device.create_stream()
+
+    buffer.advise("set_read_mostly")
+    assert _get_mem_range_attr(
+        buffer,
+        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY,
+        4,
+    ) == 1
+
+    buffer.advise("set_preferred_location", device, location_type="device")
+    preferred_type = _get_mem_range_attr(
+        buffer,
+        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_TYPE,
+        4,
+    )
+    preferred_id = _get_mem_range_attr(
+        buffer,
+        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_ID,
+        4,
+    )
+    assert int(preferred_type) == int(driver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE)
+    assert preferred_id == device.device_id
+
+    buffer.prefetch(-1, stream=stream)
+    stream.sync()
+    last_type = _get_mem_range_attr(
+        buffer,
+        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE,
+        4,
+    )
+    assert int(last_type) == int(driver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST)
+
+    buffer.discard_prefetch(device, stream=stream)
+    stream.sync()
+    last_type = _get_mem_range_attr(
+        buffer,
+        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE,
+        4,
+    )
+    last_id = _get_mem_range_attr(
+        buffer,
+        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID,
+        4,
+    )
+    assert int(last_type) == int(driver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE)
+    assert last_id == device.device_id
+
+    buffer.close()
+
+
+def test_managed_buffer_operations_support_external_managed_allocations(init_cuda):
+    device = Device()
+    skip_if_managed_memory_unsupported(device)
+    device.set_current()
+
+    buffer = DummyUnifiedMemoryResource(device).allocate(4096)
+    stream = device.create_stream()
+
+    buffer.prefetch(device, stream=stream)
+    stream.sync()
+
+    last_type = _get_mem_range_attr(
+        buffer,
+        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE,
+        4,
+    )
+    last_id = _get_mem_range_attr(
+        buffer,
+        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID,
+        4,
+    )
+    assert int(last_type) == int(driver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE)
+    assert last_id == device.device_id
+
+    buffer.close()
+
+
+def test_managed_buffer_operations_reject_non_managed_buffers(init_cuda):
+    device = Device()
+    device.set_current()
+
+    buffer = DummyDeviceMemoryResource(device).allocate(4096)
+    stream = device.create_stream()
+
+    with pytest.raises(ValueError, match="managed-memory buffer"):
+        buffer.advise("set_read_mostly")
+    with pytest.raises(ValueError, match="managed-memory buffer"):
+        buffer.prefetch(device, stream=stream)
+    with pytest.raises(ValueError, match="managed-memory buffer"):
+        buffer.discard_prefetch(device, stream=stream)
+
+    buffer.close()
+
+
+def test_managed_buffer_operation_validation(init_cuda):
+    device = Device()
+    skip_if_managed_memory_unsupported(device)
+    device.set_current()
+
+    mr = create_managed_memory_resource_or_skip()
+    buffer = mr.allocate(4096)
+    stream = device.create_stream()
+
+    with pytest.raises(ValueError, match="requires a location"):
+        buffer.prefetch(stream=stream)
+    with pytest.raises(ValueError, match="does not support location_type='host_numa'"):
+        buffer.advise("set_accessed_by", 0, location_type="host_numa")
+    with pytest.raises(ValueError, match="location must be None or -1"):
+        buffer.prefetch(0, stream=stream, location_type="host")
+
+    buffer.close()
+
+
 def test_managed_memory_resource_host_numa_auto_resolve_failure(init_cuda):
     """host_numa with None raises RuntimeError when NUMA ID cannot be determined."""
     from unittest.mock import MagicMock, patch

From c418050043ef38cc15a74e733d9038d564068c0d Mon Sep 17 00:00:00 2001
From: Rob Parolin <rparolin@nvidia.com>
Date: Mon, 16 Mar 2026 17:44:49 -0700
Subject: [PATCH 02/16] wip

---
 cuda_core/tests/test_memory.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index dd146785ec..44d50e356c 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -1151,11 +1151,14 @@ def test_managed_buffer_advise_prefetch_and_discard_prefetch(init_cuda):
     stream = device.create_stream()
 
     buffer.advise("set_read_mostly")
-    assert _get_mem_range_attr(
-        buffer,
-        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY,
-        4,
-    ) == 1
+    assert (
+        _get_mem_range_attr(
+            buffer,
+            driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY,
+            4,
+        )
+        == 1
+    )
 
     buffer.advise("set_preferred_location", device, location_type="device")
     preferred_type = _get_mem_range_attr(

From b879fa5b13922b2a41122f31751cd11c0c1fbaee Mon Sep 17 00:00:00 2001
From: Rob Parolin <rparolin@nvidia.com>
Date: Mon, 16 Mar 2026 17:51:36 -0700
Subject: [PATCH 03/16] fixing ci compiler errors

---
 cuda_core/cuda/core/_memory/_buffer.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cuda_core/cuda/core/_memory/_buffer.pyx b/cuda_core/cuda/core/_memory/_buffer.pyx
index 686585b527..05a1667b3f 100644
--- a/cuda_core/cuda/core/_memory/_buffer.pyx
+++ b/cuda_core/cuda/core/_memory/_buffer.pyx
@@ -36,7 +36,7 @@ else:
     BufferProtocol = object
 
 from cuda.core._dlpack import DLDeviceType, make_py_capsule
-from cuda.core._utils.cuda_utils import driver
+from cuda.core._utils.cuda_utils import driver, handle_return
 from cuda.core._device import Device
 
 
@@ -175,7 +175,7 @@ cdef inline object _normalize_managed_location(
     cdef int loc_id
 
     if isinstance(location, Device):
-        location = (<Device>location).device_id
+        location = location.device_id
 
     if location_type is not None and not isinstance(location_type, str):
         raise TypeError(f"{what} location_type must be a string or None, got {type(location_type).__name__}")

From 04ee3de1859c91158f30a7bffd3246024d422f0e Mon Sep 17 00:00:00 2001
From: Rob Parolin <rparolin@nvidia.com>
Date: Tue, 17 Mar 2026 09:07:10 -0700
Subject: [PATCH 04/16] skipping tests that aren't supported

---
 cuda_core/tests/test_memory.py | 130 ++++++++++++++++++++++-----------
 1 file changed, 86 insertions(+), 44 deletions(-)

diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index 44d50e356c..95c6e6e964 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -1138,18 +1138,70 @@ def _get_mem_range_attr(buffer, attribute, data_size):
     return handle_return(driver.cuMemRangeGetAttribute(data_size, attribute, buffer.handle, buffer.size))
 
 
-def test_managed_buffer_advise_prefetch_and_discard_prefetch(init_cuda):
-    device = Device()
-    skip_if_managed_memory_unsupported(device)
-    device.set_current()
+def _skip_if_managed_allocation_unsupported(device):
+    try:
+        if not device.properties.managed_memory:
+            pytest.skip("Device does not support managed memory operations")
+    except AttributeError:
+        pytest.skip("Managed-memory buffer operations require CUDA support")
+
 
+def _skip_if_managed_location_ops_unsupported(device):
+    _skip_if_managed_allocation_unsupported(device)
+    try:
+        if not device.properties.concurrent_managed_access:
+            pytest.skip("Device does not support concurrent managed memory access")
+    except AttributeError:
+        pytest.skip("Managed-memory location operations require CUDA support")
+
+
+def _skip_if_managed_discard_prefetch_unsupported(device):
+    _skip_if_managed_location_ops_unsupported(device)
     if not hasattr(driver, "cuMemDiscardAndPrefetchBatchAsync"):
         pytest.skip("discard-prefetch requires cuda.bindings support")
 
+    visible_devices = Device.get_all_devices()
+    if not all(dev.properties.concurrent_managed_access for dev in visible_devices):
+        pytest.skip("discard-prefetch requires concurrent managed access on all visible devices")
+
+
+def test_managed_buffer_prefetch_supports_managed_pool_allocations(init_cuda):
+    device = Device()
+    skip_if_managed_memory_unsupported(device)
+    device.set_current()
+
     mr = create_managed_memory_resource_or_skip()
     buffer = mr.allocate(4096)
     stream = device.create_stream()
 
+    buffer.prefetch(-1, stream=stream)
+    stream.sync()
+    last_location = _get_mem_range_attr(
+        buffer,
+        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION,
+        4,
+    )
+    assert last_location == -1
+
+    buffer.prefetch(device, stream=stream)
+    stream.sync()
+    last_location = _get_mem_range_attr(
+        buffer,
+        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION,
+        4,
+    )
+    assert last_location == device.device_id
+
+    buffer.close()
+
+
+def test_managed_buffer_advise_supports_external_managed_allocations(init_cuda):
+    device = Device()
+    _skip_if_managed_allocation_unsupported(device)
+    device.set_current()
+
+    buffer = DummyUnifiedMemoryResource(device).allocate(4096)
+
     buffer.advise("set_read_mostly")
     assert (
         _get_mem_range_attr(
@@ -1160,70 +1212,60 @@ def test_managed_buffer_advise_prefetch_and_discard_prefetch(init_cuda):
         == 1
     )
 
-    buffer.advise("set_preferred_location", device, location_type="device")
-    preferred_type = _get_mem_range_attr(
-        buffer,
-        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_TYPE,
-        4,
-    )
-    preferred_id = _get_mem_range_attr(
+    # cuda.bindings currently exposes the combined location attributes for
+    # cuMemRangeGetAttribute, so use the legacy location query here.
+    buffer.advise("set_preferred_location", location_type="host")
+    preferred_location = _get_mem_range_attr(
         buffer,
-        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_ID,
+        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION,
         4,
     )
-    assert int(preferred_type) == int(driver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE)
-    assert preferred_id == device.device_id
+    assert preferred_location == -1
 
-    buffer.prefetch(-1, stream=stream)
-    stream.sync()
-    last_type = _get_mem_range_attr(
-        buffer,
-        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE,
-        4,
-    )
-    assert int(last_type) == int(driver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST)
+    buffer.close()
 
-    buffer.discard_prefetch(device, stream=stream)
+
+def test_managed_buffer_prefetch_supports_external_managed_allocations(init_cuda):
+    device = Device()
+    _skip_if_managed_location_ops_unsupported(device)
+    device.set_current()
+
+    buffer = DummyUnifiedMemoryResource(device).allocate(4096)
+    stream = device.create_stream()
+
+    buffer.prefetch(device, stream=stream)
     stream.sync()
-    last_type = _get_mem_range_attr(
-        buffer,
-        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE,
-        4,
-    )
-    last_id = _get_mem_range_attr(
+
+    last_location = _get_mem_range_attr(
         buffer,
-        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID,
+        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION,
         4,
     )
-    assert int(last_type) == int(driver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE)
-    assert last_id == device.device_id
+    assert last_location == device.device_id
 
     buffer.close()
 
 
-def test_managed_buffer_operations_support_external_managed_allocations(init_cuda):
+def test_managed_buffer_discard_prefetch_supports_external_managed_allocations(init_cuda):
     device = Device()
-    skip_if_managed_memory_unsupported(device)
+    _skip_if_managed_discard_prefetch_unsupported(device)
     device.set_current()
 
     buffer = DummyUnifiedMemoryResource(device).allocate(4096)
     stream = device.create_stream()
 
-    buffer.prefetch(device, stream=stream)
+    buffer.prefetch(-1, stream=stream)
     stream.sync()
 
-    last_type = _get_mem_range_attr(
-        buffer,
-        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE,
-        4,
-    )
-    last_id = _get_mem_range_attr(
+    buffer.discard_prefetch(device, stream=stream)
+    stream.sync()
+
+    last_location = _get_mem_range_attr(
         buffer,
-        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID,
+        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION,
         4,
     )
-    assert int(last_type) == int(driver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE)
-    assert last_id == device.device_id
+    assert last_location == device.device_id
 
     buffer.close()
 

From 9ab3f465d1c7d072a6dd9c6b8b70a9b47a24f3d8 Mon Sep 17 00:00:00 2001
From: Rob Parolin <rparolin@nvidia.com>
Date: Tue, 17 Mar 2026 09:34:29 -0700
Subject: [PATCH 05/16] cu12 support

---
 cuda_core/cuda/core/_memory/_buffer.pyx | 40 ++++++++++++++++++--
 cuda_core/tests/test_memory.py          | 50 ++++++++++++++++++++++++-
 2 files changed, 86 insertions(+), 4 deletions(-)

diff --git a/cuda_core/cuda/core/_memory/_buffer.pyx b/cuda_core/cuda/core/_memory/_buffer.pyx
index 05a1667b3f..4460de900d 100644
--- a/cuda_core/cuda/core/_memory/_buffer.pyx
+++ b/cuda_core/cuda/core/_memory/_buffer.pyx
@@ -36,7 +36,7 @@ else:
     BufferProtocol = object
 
 from cuda.core._dlpack import DLDeviceType, make_py_capsule
-from cuda.core._utils.cuda_utils import driver, handle_return
+from cuda.core._utils.cuda_utils import driver, get_binding_version, handle_return
 from cuda.core._device import Device
 
 
@@ -247,6 +247,20 @@ cdef inline object _normalize_managed_location(
     return _make_managed_location(<str>loc_type, loc_id)
 
 
+cdef inline bint _managed_location_uses_v2_bindings():
+    # cuda.bindings 13.x switches these APIs to CUmemLocation-based wrappers.
+    return get_binding_version() >= (13, 0)
+
+
+cdef inline int _managed_location_to_legacy_device(object location, str what):
+    cdef object loc_type = location.type
+    if loc_type == _managed_location_enum("device") or loc_type == _managed_location_enum("host"):
+        return <int>location.id
+    raise RuntimeError(
+        f"{what} requires cuda.bindings 13.x for location_type={loc_type!r}"
+    )
+
+
 cdef inline void _require_managed_buffer(Buffer self, str what):
     _init_mem_attrs(self)
     if not self._mem_attrs.is_managed:
@@ -518,7 +532,17 @@ cdef class Buffer:
             allow_host_numa=advice_name not in _MANAGED_ADVICE_HOST_OR_DEVICE_ONLY,
             allow_host_numa_current=advice_name == "set_preferred_location",
         )
-        handle_return(driver.cuMemAdvise(self.handle, self._size, advice, location))
+        if _managed_location_uses_v2_bindings():
+            handle_return(driver.cuMemAdvise(self.handle, self._size, advice, location))
+        else:
+            handle_return(
+                driver.cuMemAdvise(
+                    self.handle,
+                    self._size,
+                    advice,
+                    _managed_location_to_legacy_device(location, "Buffer.advise"),
+                )
+            )
 
     def prefetch(
         self,
@@ -539,7 +563,17 @@ cdef class Buffer:
             allow_host_numa=True,
             allow_host_numa_current=True,
         )
-        handle_return(driver.cuMemPrefetchAsync(self.handle, self._size, location, 0, s.handle))
+        if _managed_location_uses_v2_bindings():
+            handle_return(driver.cuMemPrefetchAsync(self.handle, self._size, location, 0, s.handle))
+        else:
+            handle_return(
+                driver.cuMemPrefetchAsync(
+                    self.handle,
+                    self._size,
+                    _managed_location_to_legacy_device(location, "Buffer.prefetch"),
+                    s.handle,
+                )
+            )
 
     def discard_prefetch(
         self,
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index 95c6e6e964..380b581e7b 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -43,7 +43,7 @@
     system as ccx_system,
 )
 from cuda.core._dlpack import DLDeviceType
-from cuda.core._memory import IPCBufferDescriptor
+from cuda.core._memory import IPCBufferDescriptor, _buffer
 from cuda.core._utils.cuda_utils import CUDAError, handle_return
 from cuda.core.utils import StridedMemoryView
 
@@ -1270,6 +1270,54 @@ def test_managed_buffer_discard_prefetch_supports_external_managed_allocations(i
     buffer.close()
 
 
+def test_managed_buffer_advise_uses_legacy_bindings_signature(monkeypatch, init_cuda):
+    device = Device()
+    _skip_if_managed_allocation_unsupported(device)
+    device.set_current()
+
+    buffer = DummyUnifiedMemoryResource(device).allocate(4096)
+    calls = []
+
+    def fake_cuMemAdvise(ptr, size, advice, location):
+        calls.append((ptr, size, advice, location))
+        return (driver.CUresult.CUDA_SUCCESS,)
+
+    monkeypatch.setattr(_buffer, "get_binding_version", lambda: (12, 9))
+    monkeypatch.setattr(_buffer.driver, "cuMemAdvise", fake_cuMemAdvise)
+
+    buffer.advise("set_read_mostly")
+
+    assert len(calls) == 1
+    assert calls[0][3] == int(getattr(driver, "CU_DEVICE_CPU", -1))
+
+    buffer.close()
+
+
+def test_managed_buffer_prefetch_uses_legacy_bindings_signature(monkeypatch, init_cuda):
+    device = Device()
+    _skip_if_managed_location_ops_unsupported(device)
+    device.set_current()
+
+    buffer = DummyUnifiedMemoryResource(device).allocate(4096)
+    stream = device.create_stream()
+    calls = []
+
+    def fake_cuMemPrefetchAsync(ptr, size, location, hstream):
+        calls.append((ptr, size, location, hstream))
+        return (driver.CUresult.CUDA_SUCCESS,)
+
+    monkeypatch.setattr(_buffer, "get_binding_version", lambda: (12, 9))
+    monkeypatch.setattr(_buffer.driver, "cuMemPrefetchAsync", fake_cuMemPrefetchAsync)
+
+    buffer.prefetch(device, stream=stream)
+
+    assert len(calls) == 1
+    assert calls[0][2] == device.device_id
+    assert int(calls[0][3]) == int(stream.handle)
+
+    buffer.close()
+
+
 def test_managed_buffer_operations_reject_non_managed_buffers(init_cuda):
     device = Device()
     device.set_current()

From a948066ab2fc6fda3dfb74516538091e96e68746 Mon Sep 17 00:00:00 2001
From: Rob Parolin <rparolin@nvidia.com>
Date: Tue, 17 Mar 2026 16:45:51 -0700
Subject: [PATCH 06/16] Moving to function from Buffer class methods to free
 standing functions in the cuda.core.managed_memory namespace

---
 cuda_core/cuda/core/__init__.py               |   2 +-
 cuda_core/cuda/core/_memory/_buffer.pyx       | 322 +++++++++++-------
 cuda_core/cuda/core/experimental/__init__.py  |   3 +-
 cuda_core/cuda/core/managed_memory.py         |   9 +
 cuda_core/docs/source/api.rst                 |  13 +
 cuda_core/docs/source/release/0.7.x-notes.rst |  10 +-
 cuda_core/pixi.lock                           |  18 +-
 .../test_experimental_backward_compat.py      |   7 +
 cuda_core/tests/test_memory.py                | 137 +++++---
 9 files changed, 335 insertions(+), 186 deletions(-)
 create mode 100644 cuda_core/cuda/core/managed_memory.py

diff --git a/cuda_core/cuda/core/__init__.py b/cuda_core/cuda/core/__init__.py
index 139078e86e..c55c0786ed 100644
--- a/cuda_core/cuda/core/__init__.py
+++ b/cuda_core/cuda/core/__init__.py
@@ -28,7 +28,7 @@
 finally:
     del bindings, importlib, subdir, cuda_major, cuda_minor
 
-from cuda.core import system, utils
+from cuda.core import managed_memory, system, utils
 from cuda.core._device import Device
 from cuda.core._event import Event, EventOptions
 from cuda.core._graph import (
diff --git a/cuda_core/cuda/core/_memory/_buffer.pyx b/cuda_core/cuda/core/_memory/_buffer.pyx
index 8ae6d22ee5..4663302b34 100644
--- a/cuda_core/cuda/core/_memory/_buffer.pyx
+++ b/cuda_core/cuda/core/_memory/_buffer.pyx
@@ -113,6 +113,13 @@ cdef frozenset _MANAGED_ADVICE_HOST_OR_DEVICE_ONLY = frozenset((
     "unset_accessed_by",
 ))
 
+cdef int _MANAGED_SIZE_NOT_PROVIDED = -1
+cdef int _HOST_NUMA_CURRENT_ID = 0
+cdef int _FIRST_PREFETCH_LOCATION_INDEX = 0
+cdef size_t _SINGLE_RANGE_COUNT = 1
+cdef size_t _SINGLE_PREFETCH_LOCATION_COUNT = 1
+cdef unsigned long long _MANAGED_OPERATION_FLAGS = 0
+
 
 cdef inline object _managed_location_enum(str location_type):
     cdef str attr_name = _MANAGED_LOCATION_TYPE_ATTRS[location_type]
@@ -130,7 +137,7 @@ cdef inline object _make_managed_location(str location_type, int location_id):
     if location_type == "host":
         location.id = int(getattr(driver, "CU_DEVICE_CPU", -1))
     elif location_type == "host_numa_current":
-        location.id = 0
+        location.id = _HOST_NUMA_CURRENT_ID
     else:
         location.id = location_id
     return location
@@ -236,7 +243,7 @@ cdef inline object _normalize_managed_location(
             raise ValueError(
                 f"{what} location must be None when location_type is 'host_numa_current', got {location!r}"
             )
-        return _make_managed_location(loc_type, 0)
+        return _make_managed_location(loc_type, _HOST_NUMA_CURRENT_ID)
 
     if loc_type == "host" and not allow_host:
         raise ValueError(f"{what} does not support host locations")
@@ -264,16 +271,206 @@ cdef inline int _managed_location_to_legacy_device(object location, str what):
 cdef inline void _require_managed_buffer(Buffer self, str what):
     _init_mem_attrs(self)
     if not self._mem_attrs.is_managed:
-        raise ValueError(f"{what} requires a managed-memory buffer")
+        raise ValueError(f"{what} requires a managed-memory allocation")
 
 
-cdef inline void _require_managed_discard_prefetch_support():
+cdef inline void _require_managed_discard_prefetch_support(str what):
     if not hasattr(driver, "cuMemDiscardAndPrefetchBatchAsync"):
         raise RuntimeError(
-            "Buffer.discard_prefetch requires cuda.bindings support for "
-            "cuMemDiscardAndPrefetchBatchAsync"
+            f"{what} requires cuda.bindings support for cuMemDiscardAndPrefetchBatchAsync"
         )
 
+
+cdef inline tuple _managed_range_from_buffer(
+    Buffer buffer,
+    int size,
+    str what,
+):
+    if size != _MANAGED_SIZE_NOT_PROVIDED:
+        raise TypeError(f"{what} does not accept size= when target is a Buffer")
+    _require_managed_buffer(buffer, what)
+    return buffer.handle, buffer._size
+
+
+cdef inline uintptr_t _coerce_raw_pointer(object target, str what) except? 0:
+    cdef object ptr_obj
+    try:
+        ptr_obj = int(target)
+    except Exception as exc:
+        raise TypeError(
+            f"{what} target must be a Buffer or a raw pointer, got {type(target).__name__}"
+        ) from exc
+    if ptr_obj < 0:
+        raise ValueError(f"{what} target pointer must be >= 0, got {target!r}")
+    return <uintptr_t>ptr_obj
+
+
+cdef inline int _require_managed_pointer(uintptr_t ptr, str what) except -1:
+    cdef _MemAttrs mem_attrs
+    with nogil:
+        _query_memory_attrs(mem_attrs, <cydriver.CUdeviceptr>ptr)
+    if not mem_attrs.is_managed:
+        raise ValueError(f"{what} requires a managed-memory allocation")
+    return 0
+
+
+cdef inline tuple _normalize_managed_target_range(
+    object target,
+    int size,
+    str what,
+):
+    cdef uintptr_t ptr
+
+    if isinstance(target, Buffer):
+        return _managed_range_from_buffer(<Buffer>target, size, what)
+
+    if size == _MANAGED_SIZE_NOT_PROVIDED:
+        raise TypeError(f"{what} requires size= when target is a raw pointer")
+    ptr = _coerce_raw_pointer(target, what)
+    _require_managed_pointer(ptr, what)
+    return ptr, <size_t>size
+
+
+def advise(
+    target,
+    advice: driver.CUmem_advise | str,
+    location: Device | int | None = None,
+    *,
+    int size=_MANAGED_SIZE_NOT_PROVIDED,
+    location_type: str | None = None,
+):
+    """Apply managed-memory advice to an allocation range.
+
+    Parameters
+    ----------
+    target : :class:`Buffer` | int | object
+        Managed allocation to operate on. This may be a :class:`Buffer` or a
+        raw pointer (requires ``size=``).
+    advice : :obj:`~driver.CUmem_advise` | str
+        Managed-memory advice to apply. String aliases such as
+        ``"set_read_mostly"``, ``"set_preferred_location"``, and
+        ``"set_accessed_by"`` are accepted.
+    location : :obj:`~_device.Device` | int | None, optional
+        Target location. When ``location_type`` is ``None``, values are
+        interpreted as a device ordinal, ``-1`` for host, or ``None`` for
+        advice values that ignore location.
+    size : int, optional
+        Allocation size in bytes. Required when ``target`` is a raw pointer.
+    location_type : str | None, optional
+        Explicit location kind. Supported values are ``"device"``, ``"host"``,
+        ``"host_numa"``, and ``"host_numa_current"``.
+    """
+    cdef str advice_name
+    cdef object ptr
+    cdef size_t nbytes
+
+    ptr, nbytes = _normalize_managed_target_range(target, size, "advise")
+    advice_name, advice = _normalize_managed_advice(advice)
+    location = _normalize_managed_location(
+        location,
+        location_type,
+        "advise",
+        allow_none=advice_name in _MANAGED_ADVICE_IGNORE_LOCATION,
+        allow_host=True,
+        allow_host_numa=advice_name not in _MANAGED_ADVICE_HOST_OR_DEVICE_ONLY,
+        allow_host_numa_current=advice_name == "set_preferred_location",
+    )
+    if _managed_location_uses_v2_bindings():
+        handle_return(driver.cuMemAdvise(ptr, nbytes, advice, location))
+    else:
+        handle_return(
+            driver.cuMemAdvise(
+                ptr,
+                nbytes,
+                advice,
+                _managed_location_to_legacy_device(location, "advise"),
+            )
+        )
+
+
+def prefetch(
+    target,
+    location: Device | int | None = None,
+    *,
+    stream: Stream | GraphBuilder,
+    int size=_MANAGED_SIZE_NOT_PROVIDED,
+    location_type: str | None = None,
+):
+    """Prefetch a managed-memory allocation range to a target location."""
+    cdef Stream s = Stream_accept(stream)
+    cdef object ptr
+    cdef size_t nbytes
+
+    ptr, nbytes = _normalize_managed_target_range(target, size, "prefetch")
+    location = _normalize_managed_location(
+        location,
+        location_type,
+        "prefetch",
+        allow_none=False,
+        allow_host=True,
+        allow_host_numa=True,
+        allow_host_numa_current=True,
+    )
+    if _managed_location_uses_v2_bindings():
+        handle_return(
+            driver.cuMemPrefetchAsync(
+                ptr,
+                nbytes,
+                location,
+                _MANAGED_OPERATION_FLAGS,
+                s.handle,
+            )
+        )
+    else:
+        handle_return(
+            driver.cuMemPrefetchAsync(
+                ptr,
+                nbytes,
+                _managed_location_to_legacy_device(location, "prefetch"),
+                s.handle,
+            )
+        )
+
+
+def discard_prefetch(
+    target,
+    location: Device | int | None = None,
+    *,
+    stream: Stream | GraphBuilder,
+    int size=_MANAGED_SIZE_NOT_PROVIDED,
+    location_type: str | None = None,
+):
+    """Discard a managed-memory allocation range and prefetch it to a target location."""
+    cdef Stream s = Stream_accept(stream)
+    cdef object ptr
+    cdef object batch_ptr
+    cdef size_t nbytes
+
+    ptr, nbytes = _normalize_managed_target_range(target, size, "discard_prefetch")
+    batch_ptr = driver.CUdeviceptr(int(ptr))
+    _require_managed_discard_prefetch_support("discard_prefetch")
+    location = _normalize_managed_location(
+        location,
+        location_type,
+        "discard_prefetch",
+        allow_none=False,
+        allow_host=True,
+        allow_host_numa=True,
+        allow_host_numa_current=True,
+    )
+    handle_return(
+        driver.cuMemDiscardAndPrefetchBatchAsync(
+            [batch_ptr],
+            [nbytes],
+            _SINGLE_RANGE_COUNT,
+            [location],
+            [_FIRST_PREFETCH_LOCATION_INDEX],
+            _SINGLE_PREFETCH_LOCATION_COUNT,
+            _MANAGED_OPERATION_FLAGS,
+            s.handle,
+        )
+    )
+
 cdef class Buffer:
     """Represent a handle to allocated memory.
 
@@ -502,119 +699,6 @@ cdef class Buffer:
         finally:
             PyBuffer_Release(&buf)
 
-    def advise(
-        self,
-        advice: driver.CUmem_advise | str,
-        location: Device | int | None = None,
-        *,
-        location_type: str | None = None,
-    ):
-        """Apply a managed-memory advice to this buffer.
-
-        This method is only valid for buffers backed by managed memory.
-
-        Parameters
-        ----------
-        advice : :obj:`~driver.CUmem_advise` | str
-            Managed-memory advice to apply. String aliases such as
-            ``"set_read_mostly"``, ``"set_preferred_location"``, and
-            ``"set_accessed_by"`` are accepted.
-        location : :obj:`~_device.Device` | int | None, optional
-            Target location. When ``location_type`` is ``None``, values are
-            interpreted as a device ordinal, ``-1`` for host, or ``None`` for
-            advice values that ignore location.
-        location_type : str | None, optional
-            Explicit location kind. Supported values are ``"device"``,
-            ``"host"``, ``"host_numa"``, and ``"host_numa_current"``.
-        """
-        cdef str advice_name
-        _require_managed_buffer(self, "Buffer.advise")
-        advice_name, advice = _normalize_managed_advice(advice)
-        location = _normalize_managed_location(
-            location,
-            location_type,
-            "Buffer.advise",
-            allow_none=advice_name in _MANAGED_ADVICE_IGNORE_LOCATION,
-            allow_host=True,
-            allow_host_numa=advice_name not in _MANAGED_ADVICE_HOST_OR_DEVICE_ONLY,
-            allow_host_numa_current=advice_name == "set_preferred_location",
-        )
-        if _managed_location_uses_v2_bindings():
-            handle_return(driver.cuMemAdvise(self.handle, self._size, advice, location))
-        else:
-            handle_return(
-                driver.cuMemAdvise(
-                    self.handle,
-                    self._size,
-                    advice,
-                    _managed_location_to_legacy_device(location, "Buffer.advise"),
-                )
-            )
-
-    def prefetch(
-        self,
-        location: Device | int | None = None,
-        *,
-        stream: Stream | GraphBuilder,
-        location_type: str | None = None,
-    ):
-        """Prefetch this managed-memory buffer to a target location."""
-        cdef Stream s = Stream_accept(stream)
-        _require_managed_buffer(self, "Buffer.prefetch")
-        location = _normalize_managed_location(
-            location,
-            location_type,
-            "Buffer.prefetch",
-            allow_none=False,
-            allow_host=True,
-            allow_host_numa=True,
-            allow_host_numa_current=True,
-        )
-        if _managed_location_uses_v2_bindings():
-            handle_return(driver.cuMemPrefetchAsync(self.handle, self._size, location, 0, s.handle))
-        else:
-            handle_return(
-                driver.cuMemPrefetchAsync(
-                    self.handle,
-                    self._size,
-                    _managed_location_to_legacy_device(location, "Buffer.prefetch"),
-                    s.handle,
-                )
-            )
-
-    def discard_prefetch(
-        self,
-        location: Device | int | None = None,
-        *,
-        stream: Stream | GraphBuilder,
-        location_type: str | None = None,
-    ):
-        """Discard this managed-memory buffer and prefetch it to a target location."""
-        cdef Stream s = Stream_accept(stream)
-        _require_managed_buffer(self, "Buffer.discard_prefetch")
-        _require_managed_discard_prefetch_support()
-        location = _normalize_managed_location(
-            location,
-            location_type,
-            "Buffer.discard_prefetch",
-            allow_none=False,
-            allow_host=True,
-            allow_host_numa=True,
-            allow_host_numa_current=True,
-        )
-        handle_return(
-            driver.cuMemDiscardAndPrefetchBatchAsync(
-                [self.handle],
-                [self._size],
-                1,
-                [location],
-                [0],
-                1,
-                0,
-                s.handle,
-            )
-        )
-
     def __dlpack__(
         self,
         *,
diff --git a/cuda_core/cuda/core/experimental/__init__.py b/cuda_core/cuda/core/experimental/__init__.py
index e7989f0f26..83fb1c7581 100644
--- a/cuda_core/cuda/core/experimental/__init__.py
+++ b/cuda_core/cuda/core/experimental/__init__.py
@@ -38,9 +38,10 @@ def _warn_deprecated():
 _warn_deprecated()
 
 
-from cuda.core import system, utils
+from cuda.core import managed_memory, system, utils
 
 # Make utils accessible as a submodule for backward compatibility
+__import__("sys").modules[__spec__.name + ".managed_memory"] = managed_memory
 __import__("sys").modules[__spec__.name + ".utils"] = utils
 
 
diff --git a/cuda_core/cuda/core/managed_memory.py b/cuda_core/cuda/core/managed_memory.py
new file mode 100644
index 0000000000..f11aabcd19
--- /dev/null
+++ b/cuda_core/cuda/core/managed_memory.py
@@ -0,0 +1,9 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Managed-memory range operations."""
+
+from cuda.core._memory._buffer import advise, discard_prefetch, prefetch
+
+__all__ = ["advise", "prefetch", "discard_prefetch"]
diff --git a/cuda_core/docs/source/api.rst b/cuda_core/docs/source/api.rst
index fa7ce48eb5..4d63bbcf88 100644
--- a/cuda_core/docs/source/api.rst
+++ b/cuda_core/docs/source/api.rst
@@ -62,6 +62,19 @@ CUDA runtime
    on other non-blocking streams.
 
 
+.. module:: cuda.core.managed_memory
+
+Managed memory
+--------------
+
+.. autosummary::
+   :toctree: generated/
+
+   advise
+   prefetch
+   discard_prefetch
+
+
 CUDA compilation toolchain
 --------------------------
 
diff --git a/cuda_core/docs/source/release/0.7.x-notes.rst b/cuda_core/docs/source/release/0.7.x-notes.rst
index 18b3bede36..186e3181f1 100644
--- a/cuda_core/docs/source/release/0.7.x-notes.rst
+++ b/cuda_core/docs/source/release/0.7.x-notes.rst
@@ -35,10 +35,12 @@ New features
   preference, or a tuple such as ``("device", 0)``, ``("host", None)``, or
   ``("host_numa", 3)``.
 
-- Added managed-memory controls on :class:`Buffer`: ``advise()``,
-  ``prefetch()``, and ``discard_prefetch()``. These methods validate that the
-  underlying allocation is managed memory and then forward to the corresponding
-  CUDA driver operations for range advice and migration.
+- Added managed-memory range operations under :mod:`cuda.core.managed_memory`:
+  ``advise()``, ``prefetch()``, and ``discard_prefetch()``. These free
+  functions accept either a managed :class:`Buffer` or a raw pointer plus
+  ``size=``, validate that the target allocation is managed memory, and then
+  forward to the corresponding CUDA driver operations for range advice and
+  migration.
 
 - Added ``numa_id`` option to :class:`PinnedMemoryResourceOptions` for explicit
   control over host NUMA node placement. When ``ipc_enabled=True`` and
diff --git a/cuda_core/pixi.lock b/cuda_core/pixi.lock
index 78da9addb5..e2f8b7b0c2 100644
--- a/cuda_core/pixi.lock
+++ b/cuda_core/pixi.lock
@@ -2598,7 +2598,7 @@ packages:
   subdir: win-64
   variants:
     c_compiler: vs2022
-    cuda-version: 13.2.*
+    cuda_version: 13.2.*
     cxx_compiler: vs2022
     python: 3.14.*
     target_platform: win-64
@@ -2625,7 +2625,7 @@ packages:
   build: py314h9a28ecd_0
   subdir: linux-aarch64
   variants:
-    cuda-version: 13.2.*
+    cuda_version: 13.2.*
     python: 3.14.*
     target_platform: linux-aarch64
   depends:
@@ -2653,7 +2653,7 @@ packages:
   build: py314hb727236_0
   subdir: linux-64
   variants:
-    cuda-version: 13.2.*
+    cuda_version: 13.2.*
     python: 3.14.*
     target_platform: linux-64
   depends:
@@ -2794,7 +2794,7 @@ packages:
   subdir: win-64
   variants:
     c_compiler: vs2022
-    cuda-version: 13.2.*
+    cuda_version: 13.2.*
     cxx_compiler: vs2022
     python: 3.14.*
     target_platform: win-64
@@ -2817,7 +2817,7 @@ packages:
   subdir: win-64
   variants:
     c_compiler: vs2022
-    cuda-version: 12.*
+    cuda_version: 12.*
     cxx_compiler: vs2022
     python: 3.14.*
     target_platform: win-64
@@ -2840,7 +2840,7 @@ packages:
   build: py314h9a28ecd_0
   subdir: linux-aarch64
   variants:
-    cuda-version: 13.2.*
+    cuda_version: 13.2.*
     python: 3.14.*
     target_platform: linux-aarch64
   depends:
@@ -2862,7 +2862,7 @@ packages:
   build: py314ha6d028f_0
   subdir: linux-64
   variants:
-    cuda-version: 12.*
+    cuda_version: 12.*
     python: 3.14.*
     target_platform: linux-64
   depends:
@@ -2884,7 +2884,7 @@ packages:
   build: py314hb727236_0
   subdir: linux-64
   variants:
-    cuda-version: 13.2.*
+    cuda_version: 13.2.*
     python: 3.14.*
     target_platform: linux-64
   depends:
@@ -2906,7 +2906,7 @@ packages:
   build: py314he8946ed_0
   subdir: linux-aarch64
   variants:
-    cuda-version: 12.*
+    cuda_version: 12.*
     python: 3.14.*
     target_platform: linux-aarch64
   depends:
diff --git a/cuda_core/tests/test_experimental_backward_compat.py b/cuda_core/tests/test_experimental_backward_compat.py
index c3215b056a..82e2cdd5be 100644
--- a/cuda_core/tests/test_experimental_backward_compat.py
+++ b/cuda_core/tests/test_experimental_backward_compat.py
@@ -38,6 +38,7 @@ def test_experimental_backward_compatibility():
     assert hasattr(cuda.core.experimental, "Device")
     assert hasattr(cuda.core.experimental, "Stream")
     assert hasattr(cuda.core.experimental, "Buffer")
+    assert hasattr(cuda.core.experimental, "managed_memory")
     assert hasattr(cuda.core.experimental, "system")
 
     # Test 2: Direct imports - should emit deprecation warning
@@ -73,6 +74,7 @@ def test_experimental_backward_compatibility():
     assert cuda.core.experimental.Linker is cuda.core.Linker
 
     # Compare singletons
+    assert cuda.core.experimental.managed_memory is cuda.core.managed_memory
     assert cuda.core.experimental.system is cuda.core.system
 
     # Test 4: Utils module works
@@ -88,6 +90,11 @@ def test_experimental_backward_compatibility():
 
     assert StridedMemoryView is not None
     assert args_viewable_as_strided_memory is not None
+    from cuda.core.experimental.managed_memory import advise, discard_prefetch, prefetch
+
+    assert advise is not None
+    assert prefetch is not None
+    assert discard_prefetch is not None
 
     # Test 5: Options classes are accessible
     assert hasattr(cuda.core.experimental, "EventOptions")
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index 380b581e7b..927014826a 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -38,6 +38,7 @@
     PinnedMemoryResourceOptions,
     VirtualMemoryResource,
     VirtualMemoryResourceOptions,
+    managed_memory,
 )
 from cuda.core import (
     system as ccx_system,
@@ -48,6 +49,12 @@
 from cuda.core.utils import StridedMemoryView
 
 POOL_SIZE = 2097152  # 2MB size
+_MANAGED_TEST_ALLOCATION_SIZE = 4096
+_MEM_RANGE_ATTRIBUTE_VALUE_SIZE = 4
+_READ_MOSTLY_ENABLED = 1
+_HOST_LOCATION_ID = -1
+_INVALID_HOST_DEVICE_ORDINAL = 0
+_LEGACY_BINDINGS_VERSION = (12, 9)
 
 
 class DummyDeviceMemoryResource(MemoryResource):
@@ -1138,6 +1145,10 @@ def _get_mem_range_attr(buffer, attribute, data_size):
     return handle_return(driver.cuMemRangeGetAttribute(data_size, attribute, buffer.handle, buffer.size))
 
 
+def _get_int_mem_range_attr(buffer, attribute):
+    return _get_mem_range_attr(buffer, attribute, _MEM_RANGE_ATTRIBUTE_VALUE_SIZE)
+
+
 def _skip_if_managed_allocation_unsupported(device):
     try:
         if not device.properties.managed_memory:
@@ -1165,140 +1176,134 @@ def _skip_if_managed_discard_prefetch_unsupported(device):
         pytest.skip("discard-prefetch requires concurrent managed access on all visible devices")
 
 
-def test_managed_buffer_prefetch_supports_managed_pool_allocations(init_cuda):
+def test_managed_memory_prefetch_supports_managed_pool_allocations(init_cuda):
     device = Device()
     skip_if_managed_memory_unsupported(device)
     device.set_current()
 
     mr = create_managed_memory_resource_or_skip()
-    buffer = mr.allocate(4096)
+    buffer = mr.allocate(_MANAGED_TEST_ALLOCATION_SIZE)
     stream = device.create_stream()
 
-    buffer.prefetch(-1, stream=stream)
+    managed_memory.prefetch(buffer, _HOST_LOCATION_ID, stream=stream)
     stream.sync()
-    last_location = _get_mem_range_attr(
+    last_location = _get_int_mem_range_attr(
         buffer,
         driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION,
-        4,
     )
-    assert last_location == -1
+    assert last_location == _HOST_LOCATION_ID
 
-    buffer.prefetch(device, stream=stream)
+    managed_memory.prefetch(buffer, device, stream=stream)
     stream.sync()
-    last_location = _get_mem_range_attr(
+    last_location = _get_int_mem_range_attr(
         buffer,
         driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION,
-        4,
     )
     assert last_location == device.device_id
 
     buffer.close()
 
 
-def test_managed_buffer_advise_supports_external_managed_allocations(init_cuda):
+def test_managed_memory_advise_supports_external_managed_allocations(init_cuda):
     device = Device()
     _skip_if_managed_allocation_unsupported(device)
     device.set_current()
 
-    buffer = DummyUnifiedMemoryResource(device).allocate(4096)
+    buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
 
-    buffer.advise("set_read_mostly")
+    managed_memory.advise(buffer, "set_read_mostly")
     assert (
-        _get_mem_range_attr(
+        _get_int_mem_range_attr(
             buffer,
             driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY,
-            4,
         )
-        == 1
+        == _READ_MOSTLY_ENABLED
     )
 
     # cuda.bindings currently exposes the combined location attributes for
     # cuMemRangeGetAttribute, so use the legacy location query here.
-    buffer.advise("set_preferred_location", location_type="host")
-    preferred_location = _get_mem_range_attr(
+    managed_memory.advise(buffer, "set_preferred_location", location_type="host")
+    preferred_location = _get_int_mem_range_attr(
         buffer,
         driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION,
-        4,
     )
-    assert preferred_location == -1
+    assert preferred_location == _HOST_LOCATION_ID
 
     buffer.close()
 
 
-def test_managed_buffer_prefetch_supports_external_managed_allocations(init_cuda):
+def test_managed_memory_prefetch_supports_external_managed_allocations(init_cuda):
     device = Device()
     _skip_if_managed_location_ops_unsupported(device)
     device.set_current()
 
-    buffer = DummyUnifiedMemoryResource(device).allocate(4096)
+    buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
     stream = device.create_stream()
 
-    buffer.prefetch(device, stream=stream)
+    managed_memory.prefetch(buffer, device, stream=stream)
     stream.sync()
 
-    last_location = _get_mem_range_attr(
+    last_location = _get_int_mem_range_attr(
         buffer,
         driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION,
-        4,
     )
     assert last_location == device.device_id
 
     buffer.close()
 
 
-def test_managed_buffer_discard_prefetch_supports_external_managed_allocations(init_cuda):
+def test_managed_memory_discard_prefetch_supports_external_managed_allocations(init_cuda):
     device = Device()
     _skip_if_managed_discard_prefetch_unsupported(device)
     device.set_current()
 
-    buffer = DummyUnifiedMemoryResource(device).allocate(4096)
+    buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
     stream = device.create_stream()
 
-    buffer.prefetch(-1, stream=stream)
+    managed_memory.prefetch(buffer, _HOST_LOCATION_ID, stream=stream)
     stream.sync()
 
-    buffer.discard_prefetch(device, stream=stream)
+    managed_memory.discard_prefetch(buffer, device, stream=stream)
     stream.sync()
 
-    last_location = _get_mem_range_attr(
+    last_location = _get_int_mem_range_attr(
         buffer,
         driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION,
-        4,
     )
     assert last_location == device.device_id
 
     buffer.close()
 
 
-def test_managed_buffer_advise_uses_legacy_bindings_signature(monkeypatch, init_cuda):
+def test_managed_memory_advise_uses_legacy_bindings_signature(monkeypatch, init_cuda):
     device = Device()
     _skip_if_managed_allocation_unsupported(device)
     device.set_current()
 
-    buffer = DummyUnifiedMemoryResource(device).allocate(4096)
+    buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
     calls = []
 
     def fake_cuMemAdvise(ptr, size, advice, location):
         calls.append((ptr, size, advice, location))
         return (driver.CUresult.CUDA_SUCCESS,)
 
-    monkeypatch.setattr(_buffer, "get_binding_version", lambda: (12, 9))
+    monkeypatch.setattr(_buffer, "get_binding_version", lambda: _LEGACY_BINDINGS_VERSION)
     monkeypatch.setattr(_buffer.driver, "cuMemAdvise", fake_cuMemAdvise)
 
-    buffer.advise("set_read_mostly")
+    managed_memory.advise(buffer, "set_read_mostly")
 
     assert len(calls) == 1
-    assert calls[0][3] == int(getattr(driver, "CU_DEVICE_CPU", -1))
+    assert calls[0][3] == int(getattr(driver, "CU_DEVICE_CPU", _HOST_LOCATION_ID))
 
     buffer.close()
 
 
-def test_managed_buffer_prefetch_uses_legacy_bindings_signature(monkeypatch, init_cuda):
+def test_managed_memory_prefetch_uses_legacy_bindings_signature(monkeypatch, init_cuda):
     device = Device()
     _skip_if_managed_location_ops_unsupported(device)
     device.set_current()
 
-    buffer = DummyUnifiedMemoryResource(device).allocate(4096)
+    buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
     stream = device.create_stream()
     calls = []
 
@@ -1306,10 +1311,10 @@ def fake_cuMemPrefetchAsync(ptr, size, location, hstream):
         calls.append((ptr, size, location, hstream))
         return (driver.CUresult.CUDA_SUCCESS,)
 
-    monkeypatch.setattr(_buffer, "get_binding_version", lambda: (12, 9))
+    monkeypatch.setattr(_buffer, "get_binding_version", lambda: _LEGACY_BINDINGS_VERSION)
     monkeypatch.setattr(_buffer.driver, "cuMemPrefetchAsync", fake_cuMemPrefetchAsync)
 
-    buffer.prefetch(device, stream=stream)
+    managed_memory.prefetch(buffer, device, stream=stream)
 
     assert len(calls) == 1
     assert calls[0][2] == device.device_id
@@ -1318,38 +1323,66 @@ def fake_cuMemPrefetchAsync(ptr, size, location, hstream):
     buffer.close()
 
 
-def test_managed_buffer_operations_reject_non_managed_buffers(init_cuda):
+def test_managed_memory_operations_reject_non_managed_allocations(init_cuda):
     device = Device()
     device.set_current()
 
-    buffer = DummyDeviceMemoryResource(device).allocate(4096)
+    buffer = DummyDeviceMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
     stream = device.create_stream()
 
-    with pytest.raises(ValueError, match="managed-memory buffer"):
-        buffer.advise("set_read_mostly")
-    with pytest.raises(ValueError, match="managed-memory buffer"):
-        buffer.prefetch(device, stream=stream)
-    with pytest.raises(ValueError, match="managed-memory buffer"):
-        buffer.discard_prefetch(device, stream=stream)
+    with pytest.raises(ValueError, match="managed-memory allocation"):
+        managed_memory.advise(buffer, "set_read_mostly")
+    with pytest.raises(ValueError, match="managed-memory allocation"):
+        managed_memory.prefetch(buffer, device, stream=stream)
+    with pytest.raises(ValueError, match="managed-memory allocation"):
+        managed_memory.discard_prefetch(buffer, device, stream=stream)
 
     buffer.close()
 
 
-def test_managed_buffer_operation_validation(init_cuda):
+def test_managed_memory_operation_validation(init_cuda):
     device = Device()
     skip_if_managed_memory_unsupported(device)
     device.set_current()
 
     mr = create_managed_memory_resource_or_skip()
-    buffer = mr.allocate(4096)
+    buffer = mr.allocate(_MANAGED_TEST_ALLOCATION_SIZE)
     stream = device.create_stream()
 
     with pytest.raises(ValueError, match="requires a location"):
-        buffer.prefetch(stream=stream)
+        managed_memory.prefetch(buffer, stream=stream)
     with pytest.raises(ValueError, match="does not support location_type='host_numa'"):
-        buffer.advise("set_accessed_by", 0, location_type="host_numa")
+        managed_memory.advise(buffer, "set_accessed_by", _INVALID_HOST_DEVICE_ORDINAL, location_type="host_numa")
     with pytest.raises(ValueError, match="location must be None or -1"):
-        buffer.prefetch(0, stream=stream, location_type="host")
+        managed_memory.prefetch(buffer, _INVALID_HOST_DEVICE_ORDINAL, stream=stream, location_type="host")
+
+    buffer.close()
+
+
+def test_managed_memory_functions_accept_raw_pointer_ranges(init_cuda):
+    device = Device()
+    _skip_if_managed_location_ops_unsupported(device)
+    device.set_current()
+
+    buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
+    stream = device.create_stream()
+
+    managed_memory.advise(buffer.handle, "set_read_mostly", size=buffer.size)
+    assert (
+        _get_int_mem_range_attr(
+            buffer,
+            driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY,
+        )
+        == _READ_MOSTLY_ENABLED
+    )
+
+    managed_memory.prefetch(buffer.handle, device, size=buffer.size, stream=stream)
+    stream.sync()
+    last_location = _get_int_mem_range_attr(
+        buffer,
+        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION,
+    )
+    assert last_location == device.device_id
 
     buffer.close()
 

From 14575991d65ca85973a4f1dc61f068efc4fc3293 Mon Sep 17 00:00:00 2001
From: Rob Parolin <rparolin@nvidia.com>
Date: Tue, 17 Mar 2026 16:46:20 -0700
Subject: [PATCH 07/16] precommit format

---
 cuda_core/cuda/core/managed_memory.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cuda_core/cuda/core/managed_memory.py b/cuda_core/cuda/core/managed_memory.py
index f11aabcd19..f5bb09c13d 100644
--- a/cuda_core/cuda/core/managed_memory.py
+++ b/cuda_core/cuda/core/managed_memory.py
@@ -6,4 +6,4 @@
 
 from cuda.core._memory._buffer import advise, discard_prefetch, prefetch
 
-__all__ = ["advise", "prefetch", "discard_prefetch"]
+__all__ = ["advise", "discard_prefetch", "prefetch"]

From acb402478cac58689f069e0836819b2e91010c09 Mon Sep 17 00:00:00 2001
From: Rob Parolin <rparolin@nvidia.com>
Date: Tue, 17 Mar 2026 17:30:41 -0700
Subject: [PATCH 08/16] iterating on implementation

---
 cuda_bindings/pixi.lock                 | 86 ++++++++++++-------------
 cuda_core/cuda/core/_memory/_buffer.pyx | 63 ++++++++++++++----
 cuda_core/tests/test_memory.py          | 85 ++++++++++++++++++++++++
 3 files changed, 178 insertions(+), 56 deletions(-)

diff --git a/cuda_bindings/pixi.lock b/cuda_bindings/pixi.lock
index b01d6eec69..237a169580 100644
--- a/cuda_bindings/pixi.lock
+++ b/cuda_bindings/pixi.lock
@@ -1081,21 +1081,21 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.4-h3394656_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/conda-gcc-specs-15.2.0-h53410ce_16.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cccl_linux-64-13.2.27-ha770c72_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-crt-dev_linux-64-13.2.51-ha770c72_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-cudart-13.2.51-hecca717_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-cudart-dev-13.2.51-hecca717_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-dev_linux-64-13.2.51-h376f20c_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-cudart-static-13.2.51-hecca717_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-static_linux-64-13.2.51-h376f20c_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart_linux-64-13.2.51-h376f20c_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvrtc-13.2.51-hecca717_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvvm-13.2.51-h69a702a_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-nvvm-dev_linux-64-13.2.51-ha770c72_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvvm-impl-13.2.51-h4bc722e_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvvm-tools-13.2.51-h4bc722e_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-profiler-api-13.2.20-h7938cbb_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.2-he2cc418_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cccl_linux-64-12.9.27-ha770c72_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-crt-dev_linux-64-12.9.86-ha770c72_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-cudart-12.9.79-h5888daf_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-cudart-dev-12.9.79-h5888daf_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-dev_linux-64-12.9.79-h3f2d84a_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-cudart-static-12.9.79-h5888daf_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-static_linux-64-12.9.79-h3f2d84a_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart_linux-64-12.9.79-h3f2d84a_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvrtc-12.9.86-hecca717_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvvm-12.9.86-h69a702a_6.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-nvvm-dev_linux-64-12.9.86-ha770c72_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvvm-impl-12.9.86-h4bc722e_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvvm-tools-12.9.86-h4bc722e_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-profiler-api-12.9.79-h7938cbb_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.9-h4f385c5_3.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/cython-3.2.3-py314h1807b08_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/dav1d-1.2.1-hd590300_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/dbus-1.16.2-h24cb091_1.conda
@@ -1134,7 +1134,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.11.0-5_h4a7cf45_openblas.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcap-2.77-h3ff7636_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.11.0-5_h0358290_openblas.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufile-1.17.0.44-h85c024f_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufile-1.14.1.1-hbc026e6_1.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.25-h17f619e_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libdrm-2.4.125-hb03c661_1.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libegl-1.7.0-ha4b6fd6_2.conda
@@ -1160,8 +1160,8 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.1-hb9d3cd8_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-hb9d3cd8_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libnl-3.11.0-hb9d3cd8_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/libnvfatbin-13.2.51-hecca717_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/libnvjitlink-13.2.51-hecca717_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libnvfatbin-12.9.82-hecca717_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libnvjitlink-12.9.86-hecca717_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.5-hd0c01bc_1.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.30-pthreads_h94d23a6_4.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libopenvino-2025.2.0-hb617929_1.conda
@@ -1264,7 +1264,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.23.0-pyhcf101f3_1.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb78ec9c_6.conda
       - conda: .
-        build: py314hb727236_0
+        build: py314ha6d028f_0
       - conda: ../cuda_pathfinder
       linux-aarch64:
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/_openmp_mutex-4.5-2_gnu.tar.bz2
@@ -1460,21 +1460,21 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/cairo-1.18.4-h5782bbf_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/conda-gcc-specs-15.2.0-hd546029_16.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cccl_win-64-12.9.27-h57928b3_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-crt-dev_win-64-12.9.86-h57928b3_2.conda
-      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-cudart-12.9.79-he0c23c2_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-cudart-dev-12.9.79-he0c23c2_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-dev_win-64-12.9.79-he0c23c2_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-cudart-static-12.9.79-he0c23c2_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-static_win-64-12.9.79-he0c23c2_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart_win-64-12.9.79-he0c23c2_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvrtc-12.9.86-hac47afa_1.conda
-      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvvm-12.9.86-h719f0c7_6.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-nvvm-dev_win-64-12.9.86-h57928b3_2.conda
-      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvvm-impl-12.9.86-h2466b09_2.conda
-      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvvm-tools-12.9.86-h2466b09_2.conda
-      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-profiler-api-12.9.79-h57928b3_1.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.9-h4f385c5_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cccl_win-64-13.2.27-h57928b3_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-crt-dev_win-64-13.2.51-h57928b3_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-cudart-13.2.51-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-cudart-dev-13.2.51-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-dev_win-64-13.2.51-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-cudart-static-13.2.51-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-static_win-64-13.2.51-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart_win-64-13.2.51-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvrtc-13.2.51-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvvm-13.2.51-h719f0c7_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-nvvm-dev_win-64-13.2.51-h57928b3_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvvm-impl-13.2.51-h2466b09_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvvm-tools-13.2.51-h2466b09_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-profiler-api-13.2.20-h57928b3_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.2-he2cc418_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/cython-3.2.3-py314h344ed54_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/dav1d-1.2.1-hcfcfb64_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
@@ -1520,8 +1520,8 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/liblapack-3.11.0-5_hf9ab0e9_mkl.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/liblzma-5.8.1-h2466b09_2.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libmpdec-4.0.0-h2466b09_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/win-64/libnvfatbin-12.9.82-hac47afa_1.conda
-      - conda: https://conda.anaconda.org/conda-forge/win-64/libnvjitlink-12.9.86-hac47afa_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libnvfatbin-13.2.51-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libnvjitlink-13.2.51-hac47afa_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libogg-1.3.5-h2466b09_1.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libopus-1.6-h6a83c73_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libpng-1.6.53-h7351971_0.conda
@@ -1583,7 +1583,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.23.0-pyhcf101f3_1.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py314h5e6f764_0
+        build: py314h356c398_0
       - conda: ../cuda_pathfinder
 packages:
 - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2
@@ -2154,7 +2154,7 @@ packages:
   subdir: win-64
   variants:
     c_compiler: vs2022
-    cuda-version: 13.2.*
+    cuda_version: 13.2.*
     cxx_compiler: vs2022
     python: 3.14.*
     target_platform: win-64
@@ -2182,7 +2182,7 @@ packages:
   subdir: win-64
   variants:
     c_compiler: vs2022
-    cuda-version: 12.*
+    cuda_version: 12.*
     cxx_compiler: vs2022
     python: 3.14.*
     target_platform: win-64
@@ -2209,7 +2209,7 @@ packages:
   build: py314h9a28ecd_0
   subdir: linux-aarch64
   variants:
-    cuda-version: 13.2.*
+    cuda_version: 13.2.*
     python: 3.14.*
     target_platform: linux-aarch64
   depends:
@@ -2237,7 +2237,7 @@ packages:
   build: py314ha6d028f_0
   subdir: linux-64
   variants:
-    cuda-version: 12.*
+    cuda_version: 12.*
     python: 3.14.*
     target_platform: linux-64
   depends:
@@ -2265,7 +2265,7 @@ packages:
   build: py314hb727236_0
   subdir: linux-64
   variants:
-    cuda-version: 13.2.*
+    cuda_version: 13.2.*
     python: 3.14.*
     target_platform: linux-64
   depends:
@@ -2293,7 +2293,7 @@ packages:
   build: py314he8946ed_0
   subdir: linux-aarch64
   variants:
-    cuda-version: 12.*
+    cuda_version: 12.*
     python: 3.14.*
     target_platform: linux-aarch64
   depends:
diff --git a/cuda_core/cuda/core/_memory/_buffer.pyx b/cuda_core/cuda/core/_memory/_buffer.pyx
index 4663302b34..829e05b3ad 100644
--- a/cuda_core/cuda/core/_memory/_buffer.pyx
+++ b/cuda_core/cuda/core/_memory/_buffer.pyx
@@ -205,9 +205,11 @@ cdef inline object _normalize_managed_location(
             )
         loc_id = <int>location
         if loc_id == -1:
-            loc_type = "host"
+            if not allow_host:
+                raise ValueError(f"{what} does not support host locations")
+            return _make_managed_location("host", -1)
         elif loc_id >= 0:
-            loc_type = "device"
+            return _make_managed_location("device", loc_id)
         else:
             raise ValueError(
                 f"{what} location must be a device ordinal (>= 0), -1 for host, or None; got {location!r}"
@@ -245,23 +247,22 @@ cdef inline object _normalize_managed_location(
             )
         return _make_managed_location(loc_type, _HOST_NUMA_CURRENT_ID)
 
-    if loc_type == "host" and not allow_host:
-        raise ValueError(f"{what} does not support host locations")
-    if loc_type == "host_numa" and not allow_host_numa:
-        raise ValueError(f"{what} does not support location_type='host_numa'")
-    if loc_type == "host_numa_current" and not allow_host_numa_current:
-        raise ValueError(f"{what} does not support location_type='host_numa_current'")
-    return _make_managed_location(<str>loc_type, loc_id)
-
 
 cdef inline bint _managed_location_uses_v2_bindings():
     # cuda.bindings 13.x switches these APIs to CUmemLocation-based wrappers.
     return get_binding_version() >= (13, 0)
 
 
+cdef object _LEGACY_LOC_DEVICE = None
+cdef object _LEGACY_LOC_HOST = None
+
 cdef inline int _managed_location_to_legacy_device(object location, str what):
+    global _LEGACY_LOC_DEVICE, _LEGACY_LOC_HOST
+    if _LEGACY_LOC_DEVICE is None:
+        _LEGACY_LOC_DEVICE = _managed_location_enum("device")
+        _LEGACY_LOC_HOST = _managed_location_enum("host")
     cdef object loc_type = location.type
-    if loc_type == _managed_location_enum("device") or loc_type == _managed_location_enum("host"):
+    if loc_type == _LEGACY_LOC_DEVICE or loc_type == _LEGACY_LOC_HOST:
         return <int>location.id
     raise RuntimeError(
         f"{what} requires cuda.bindings 13.x for location_type={loc_type!r}"
@@ -396,7 +397,25 @@ def prefetch(
     int size=_MANAGED_SIZE_NOT_PROVIDED,
     location_type: str | None = None,
 ):
-    """Prefetch a managed-memory allocation range to a target location."""
+    """Prefetch a managed-memory allocation range to a target location.
+
+    Parameters
+    ----------
+    target : :class:`Buffer` | int | object
+        Managed allocation to operate on. This may be a :class:`Buffer` or a
+        raw pointer (requires ``size=``).
+    location : :obj:`~_device.Device` | int | None, optional
+        Target location. When ``location_type`` is ``None``, values are
+        interpreted as a device ordinal, ``-1`` for host, or ``None``.
+        A location is required for prefetch.
+    stream : :obj:`~_stream.Stream` | :obj:`~_graph.GraphBuilder`
+        Keyword argument specifying the stream for the asynchronous prefetch.
+    size : int, optional
+        Allocation size in bytes. Required when ``target`` is a raw pointer.
+    location_type : str | None, optional
+        Explicit location kind. Supported values are ``"device"``, ``"host"``,
+        ``"host_numa"``, and ``"host_numa_current"``.
+    """
     cdef Stream s = Stream_accept(stream)
     cdef object ptr
     cdef size_t nbytes
@@ -440,7 +459,25 @@ def discard_prefetch(
     int size=_MANAGED_SIZE_NOT_PROVIDED,
     location_type: str | None = None,
 ):
-    """Discard a managed-memory allocation range and prefetch it to a target location."""
+    """Discard a managed-memory allocation range and prefetch it to a target location.
+
+    Parameters
+    ----------
+    target : :class:`Buffer` | int | object
+        Managed allocation to operate on. This may be a :class:`Buffer` or a
+        raw pointer (requires ``size=``).
+    location : :obj:`~_device.Device` | int | None, optional
+        Target location. When ``location_type`` is ``None``, values are
+        interpreted as a device ordinal, ``-1`` for host, or ``None``.
+        A location is required for discard_prefetch.
+    stream : :obj:`~_stream.Stream` | :obj:`~_graph.GraphBuilder`
+        Keyword argument specifying the stream for the asynchronous operation.
+    size : int, optional
+        Allocation size in bytes. Required when ``target`` is a raw pointer.
+    location_type : str | None, optional
+        Explicit location kind. Supported values are ``"device"``, ``"host"``,
+        ``"host_numa"``, and ``"host_numa_current"``.
+    """
     cdef Stream s = Stream_accept(stream)
     cdef object ptr
     cdef object batch_ptr
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index 927014826a..ea827818ac 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -1359,6 +1359,91 @@ def test_managed_memory_operation_validation(init_cuda):
     buffer.close()
 
 
+def test_managed_memory_advise_location_validation(init_cuda):
+    """Verify doc-specified location constraints for each advice kind."""
+    device = Device()
+    _skip_if_managed_allocation_unsupported(device)
+    device.set_current()
+
+    buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
+
+    # set_read_mostly works without a location (location is ignored)
+    managed_memory.advise(buffer, "set_read_mostly")
+
+    # set_preferred_location requires a location; device ordinal works
+    managed_memory.advise(buffer, "set_preferred_location", device.device_id)
+
+    # set_preferred_location with host location_type
+    managed_memory.advise(buffer, "set_preferred_location", location_type="host")
+
+    # set_accessed_by with host_numa raises ValueError (INVALID per CUDA docs)
+    with pytest.raises(ValueError, match="does not support location_type='host_numa'"):
+        managed_memory.advise(buffer, "set_accessed_by", 0, location_type="host_numa")
+
+    # set_accessed_by with host_numa_current also raises ValueError
+    with pytest.raises(ValueError, match="does not support location_type='host_numa_current'"):
+        managed_memory.advise(buffer, "set_accessed_by", location_type="host_numa_current")
+
+    # Inferred location from int: -1 maps to host, 0 maps to device
+    managed_memory.advise(buffer, "set_preferred_location", -1)
+    managed_memory.advise(buffer, "set_preferred_location", 0)
+
+    buffer.close()
+
+
+def test_managed_memory_advise_accepts_enum_value(init_cuda):
+    """advise() accepts CUmem_advise enum values directly, not just string aliases."""
+    device = Device()
+    _skip_if_managed_allocation_unsupported(device)
+    device.set_current()
+
+    buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
+
+    advice_enum = driver.CUmem_advise.CU_MEM_ADVISE_SET_READ_MOSTLY
+    managed_memory.advise(buffer, advice_enum)
+
+    assert (
+        _get_int_mem_range_attr(
+            buffer,
+            driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY,
+        )
+        == _READ_MOSTLY_ENABLED
+    )
+
+    buffer.close()
+
+
+def test_managed_memory_advise_size_rejected_for_buffer(init_cuda):
+    """advise() raises TypeError when size= is given with a Buffer target."""
+    device = Device()
+    _skip_if_managed_allocation_unsupported(device)
+    device.set_current()
+
+    buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
+
+    with pytest.raises(TypeError, match="does not accept size="):
+        managed_memory.advise(buffer, "set_read_mostly", size=1024)
+
+    buffer.close()
+
+
+def test_managed_memory_advise_invalid_advice_values(init_cuda):
+    """advise() rejects invalid advice strings and wrong types."""
+    device = Device()
+    _skip_if_managed_allocation_unsupported(device)
+    device.set_current()
+
+    buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
+
+    with pytest.raises(ValueError, match="advice must be one of"):
+        managed_memory.advise(buffer, "not_a_real_advice")
+
+    with pytest.raises(TypeError, match="advice must be"):
+        managed_memory.advise(buffer, 42)
+
+    buffer.close()
+
+
 def test_managed_memory_functions_accept_raw_pointer_ranges(init_cuda):
     device = Device()
     _skip_if_managed_location_ops_unsupported(device)

From d10ab07e2f402628b83b08e07d95da39c4f2b634 Mon Sep 17 00:00:00 2001
From: Rob Parolin <rparolin@nvidia.com>
Date: Tue, 17 Mar 2026 18:13:36 -0700
Subject: [PATCH 09/16] Simplify managed-memory helpers: remove long-form
 aliases, cache lookups, fix docs

- Remove duplicate long-form "cu_mem_advise_*" string aliases from
  _MANAGED_ADVICE_ALIASES; users pass short strings or the enum directly
- Replace 4 boolean allow_* params in _normalize_managed_location with a
  single allowed_loctypes frozenset driven by _MANAGED_ADVICE_ALLOWED_LOCTYPES
- Cache immutable runtime checks: CU_DEVICE_CPU, v2 bindings flag,
  discard_prefetch support, and advice enum-to-alias reverse map
- Collapse hasattr+getattr to single getattr in _managed_location_enum
- Move _require_managed_discard_prefetch_support to top of discard_prefetch
  for fail-fast behavior
- Fix docs build: reset Sphinx module scope after managed_memory section in
  api.rst so subsequent sections resolve under cuda.core
- Add discard_prefetch pool-allocation test and comment on _get_mem_range_attr

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cuda_core/cuda/core/_memory/_buffer.pyx | 94 ++++++++++++++-----------
 cuda_core/docs/source/api.rst           |  2 +
 cuda_core/tests/test_memory.py          | 26 +++++++
 3 files changed, 79 insertions(+), 43 deletions(-)

diff --git a/cuda_core/cuda/core/_memory/_buffer.pyx b/cuda_core/cuda/core/_memory/_buffer.pyx
index 829e05b3ad..d280b4ea2b 100644
--- a/cuda_core/cuda/core/_memory/_buffer.pyx
+++ b/cuda_core/cuda/core/_memory/_buffer.pyx
@@ -89,17 +89,11 @@ cdef dict _MANAGED_LOCATION_TYPE_ATTRS = {
 
 cdef dict _MANAGED_ADVICE_ALIASES = {
     "set_read_mostly": "CU_MEM_ADVISE_SET_READ_MOSTLY",
-    "cu_mem_advise_set_read_mostly": "CU_MEM_ADVISE_SET_READ_MOSTLY",
     "unset_read_mostly": "CU_MEM_ADVISE_UNSET_READ_MOSTLY",
-    "cu_mem_advise_unset_read_mostly": "CU_MEM_ADVISE_UNSET_READ_MOSTLY",
     "set_preferred_location": "CU_MEM_ADVISE_SET_PREFERRED_LOCATION",
-    "cu_mem_advise_set_preferred_location": "CU_MEM_ADVISE_SET_PREFERRED_LOCATION",
     "unset_preferred_location": "CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION",
-    "cu_mem_advise_unset_preferred_location": "CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION",
     "set_accessed_by": "CU_MEM_ADVISE_SET_ACCESSED_BY",
-    "cu_mem_advise_set_accessed_by": "CU_MEM_ADVISE_SET_ACCESSED_BY",
     "unset_accessed_by": "CU_MEM_ADVISE_UNSET_ACCESSED_BY",
-    "cu_mem_advise_unset_accessed_by": "CU_MEM_ADVISE_UNSET_ACCESSED_BY",
 }
 
 cdef frozenset _MANAGED_ADVICE_IGNORE_LOCATION = frozenset((
@@ -108,10 +102,18 @@ cdef frozenset _MANAGED_ADVICE_IGNORE_LOCATION = frozenset((
     "unset_preferred_location",
 ))
 
-cdef frozenset _MANAGED_ADVICE_HOST_OR_DEVICE_ONLY = frozenset((
-    "set_accessed_by",
-    "unset_accessed_by",
-))
+cdef frozenset _ALL_LOCATION_TYPES = frozenset(("device", "host", "host_numa", "host_numa_current"))
+cdef frozenset _DEVICE_HOST_NUMA = frozenset(("device", "host", "host_numa"))
+cdef frozenset _DEVICE_HOST_ONLY = frozenset(("device", "host"))
+
+cdef dict _MANAGED_ADVICE_ALLOWED_LOCTYPES = {
+    "set_read_mostly": _DEVICE_HOST_NUMA,
+    "unset_read_mostly": _DEVICE_HOST_NUMA,
+    "set_preferred_location": _ALL_LOCATION_TYPES,
+    "unset_preferred_location": _DEVICE_HOST_NUMA,
+    "set_accessed_by": _DEVICE_HOST_ONLY,
+    "unset_accessed_by": _DEVICE_HOST_ONLY,
+}
 
 cdef int _MANAGED_SIZE_NOT_PROVIDED = -1
 cdef int _HOST_NUMA_CURRENT_ID = 0
@@ -120,22 +122,32 @@ cdef size_t _SINGLE_RANGE_COUNT = 1
 cdef size_t _SINGLE_PREFETCH_LOCATION_COUNT = 1
 cdef unsigned long long _MANAGED_OPERATION_FLAGS = 0
 
+# Lazily cached values for immutable runtime properties.
+cdef object _CU_DEVICE_CPU = None
+cdef dict _ADVICE_ENUM_TO_ALIAS = None
+cdef int _V2_BINDINGS = -1
+cdef int _DISCARD_PREFETCH_SUPPORTED = -1
+
 
 cdef inline object _managed_location_enum(str location_type):
     cdef str attr_name = _MANAGED_LOCATION_TYPE_ATTRS[location_type]
-    if not hasattr(driver.CUmemLocationType, attr_name):
+    cdef object result = getattr(driver.CUmemLocationType, attr_name, None)
+    if result is None:
         raise RuntimeError(
             f"Managed-memory location type {location_type!r} is not supported by the "
             f"installed cuda.bindings package."
         )
-    return getattr(driver.CUmemLocationType, attr_name)
+    return result
 
 
 cdef inline object _make_managed_location(str location_type, int location_id):
+    global _CU_DEVICE_CPU
     cdef object location = driver.CUmemLocation()
     location.type = _managed_location_enum(location_type)
     if location_type == "host":
-        location.id = int(getattr(driver, "CU_DEVICE_CPU", -1))
+        if _CU_DEVICE_CPU is None:
+            _CU_DEVICE_CPU = int(getattr(driver, "CU_DEVICE_CPU", -1))
+        location.id = _CU_DEVICE_CPU
     elif location_type == "host_numa_current":
         location.id = _HOST_NUMA_CURRENT_ID
     else:
@@ -157,12 +169,17 @@ cdef inline tuple _normalize_managed_advice(object advice):
         return alias, getattr(driver.CUmem_advise, attr_name)
 
     if isinstance(advice, driver.CUmem_advise):
-        for alias, attr_name in _MANAGED_ADVICE_ALIASES.items():
-            if alias.startswith("cu_mem_advise_"):
-                continue
-            if advice == getattr(driver.CUmem_advise, attr_name):
-                return alias, advice
-        raise ValueError(f"Unsupported advice value: {advice!r}")
+        global _ADVICE_ENUM_TO_ALIAS
+        if _ADVICE_ENUM_TO_ALIAS is None:
+            _ADVICE_ENUM_TO_ALIAS = {}
+            for alias, attr_name in _MANAGED_ADVICE_ALIASES.items():
+                enum_val = getattr(driver.CUmem_advise, attr_name, None)
+                if enum_val is not None:
+                    _ADVICE_ENUM_TO_ALIAS[enum_val] = alias
+        alias = _ADVICE_ENUM_TO_ALIAS.get(advice)
+        if alias is None:
+            raise ValueError(f"Unsupported advice value: {advice!r}")
+        return alias, advice
 
     raise TypeError(
         "advice must be a cuda.bindings.driver.CUmem_advise value or a supported string alias"
@@ -174,9 +191,7 @@ cdef inline object _normalize_managed_location(
     object location_type,
     str what,
     bint allow_none=False,
-    bint allow_host=True,
-    bint allow_host_numa=True,
-    bint allow_host_numa_current=True,
+    frozenset allowed_loctypes=_ALL_LOCATION_TYPES,
 ):
     cdef object loc_type
     cdef int loc_id
@@ -194,6 +209,9 @@ cdef inline object _normalize_managed_location(
             f"or None, got {location_type!r}"
         )
 
+    if loc_type is not None and loc_type not in allowed_loctypes:
+        raise ValueError(f"{what} does not support location_type='{loc_type}'")
+
     if loc_type is None:
         if location is None:
             if allow_none:
@@ -205,7 +223,7 @@ cdef inline object _normalize_managed_location(
             )
         loc_id = <int>location
         if loc_id == -1:
-            if not allow_host:
+            if "host" not in allowed_loctypes:
                 raise ValueError(f"{what} does not support host locations")
             return _make_managed_location("host", -1)
         elif loc_id >= 0:
@@ -227,20 +245,14 @@ cdef inline object _normalize_managed_location(
             raise ValueError(
                 f"{what} location must be None or -1 when location_type is 'host', got {location!r}"
             )
-        if not allow_host:
-            raise ValueError(f"{what} does not support location_type='host'")
         return _make_managed_location(loc_type, -1)
     elif loc_type == "host_numa":
-        if not allow_host_numa:
-            raise ValueError(f"{what} does not support location_type='host_numa'")
         if not isinstance(location, int) or <int>location < 0:
             raise ValueError(
                 f"{what} location must be a NUMA node ID (>= 0) when location_type is 'host_numa', got {location!r}"
             )
         return _make_managed_location(loc_type, <int>location)
     else:
-        if not allow_host_numa_current:
-            raise ValueError(f"{what} does not support location_type='host_numa_current'")
         if location is not None:
             raise ValueError(
                 f"{what} location must be None when location_type is 'host_numa_current', got {location!r}"
@@ -250,7 +262,10 @@ cdef inline object _normalize_managed_location(
 
 cdef inline bint _managed_location_uses_v2_bindings():
     # cuda.bindings 13.x switches these APIs to CUmemLocation-based wrappers.
-    return get_binding_version() >= (13, 0)
+    global _V2_BINDINGS
+    if _V2_BINDINGS < 0:
+        _V2_BINDINGS = 1 if get_binding_version() >= (13, 0) else 0
+    return _V2_BINDINGS != 0
 
 
 cdef object _LEGACY_LOC_DEVICE = None
@@ -276,7 +291,10 @@ cdef inline void _require_managed_buffer(Buffer self, str what):
 
 
 cdef inline void _require_managed_discard_prefetch_support(str what):
-    if not hasattr(driver, "cuMemDiscardAndPrefetchBatchAsync"):
+    global _DISCARD_PREFETCH_SUPPORTED
+    if _DISCARD_PREFETCH_SUPPORTED < 0:
+        _DISCARD_PREFETCH_SUPPORTED = 1 if hasattr(driver, "cuMemDiscardAndPrefetchBatchAsync") else 0
+    if not _DISCARD_PREFETCH_SUPPORTED:
         raise RuntimeError(
             f"{what} requires cuda.bindings support for cuMemDiscardAndPrefetchBatchAsync"
         )
@@ -372,9 +390,7 @@ def advise(
         location_type,
         "advise",
         allow_none=advice_name in _MANAGED_ADVICE_IGNORE_LOCATION,
-        allow_host=True,
-        allow_host_numa=advice_name not in _MANAGED_ADVICE_HOST_OR_DEVICE_ONLY,
-        allow_host_numa_current=advice_name == "set_preferred_location",
+        allowed_loctypes=_MANAGED_ADVICE_ALLOWED_LOCTYPES[advice_name],
     )
     if _managed_location_uses_v2_bindings():
         handle_return(driver.cuMemAdvise(ptr, nbytes, advice, location))
@@ -425,10 +441,6 @@ def prefetch(
         location,
         location_type,
         "prefetch",
-        allow_none=False,
-        allow_host=True,
-        allow_host_numa=True,
-        allow_host_numa_current=True,
     )
     if _managed_location_uses_v2_bindings():
         handle_return(
@@ -478,6 +490,7 @@ def discard_prefetch(
         Explicit location kind. Supported values are ``"device"``, ``"host"``,
         ``"host_numa"``, and ``"host_numa_current"``.
     """
+    _require_managed_discard_prefetch_support("discard_prefetch")
     cdef Stream s = Stream_accept(stream)
     cdef object ptr
     cdef object batch_ptr
@@ -485,15 +498,10 @@ def discard_prefetch(
 
     ptr, nbytes = _normalize_managed_target_range(target, size, "discard_prefetch")
     batch_ptr = driver.CUdeviceptr(int(ptr))
-    _require_managed_discard_prefetch_support("discard_prefetch")
     location = _normalize_managed_location(
         location,
         location_type,
         "discard_prefetch",
-        allow_none=False,
-        allow_host=True,
-        allow_host_numa=True,
-        allow_host_numa_current=True,
     )
     handle_return(
         driver.cuMemDiscardAndPrefetchBatchAsync(
diff --git a/cuda_core/docs/source/api.rst b/cuda_core/docs/source/api.rst
index 4d63bbcf88..7bf59ae495 100644
--- a/cuda_core/docs/source/api.rst
+++ b/cuda_core/docs/source/api.rst
@@ -74,6 +74,8 @@ Managed memory
    prefetch
    discard_prefetch
 
+.. module:: cuda.core
+   :no-index:
 
 CUDA compilation toolchain
 --------------------------
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index ea827818ac..5296ea344a 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -1142,6 +1142,7 @@ def test_managed_memory_resource_preferred_location_validation(init_cuda):
 
 
 def _get_mem_range_attr(buffer, attribute, data_size):
+    # cuMemRangeGetAttribute returns a raw integer when data_size <= 4.
     return handle_return(driver.cuMemRangeGetAttribute(data_size, attribute, buffer.handle, buffer.size))
 
 
@@ -1252,6 +1253,31 @@ def test_managed_memory_prefetch_supports_external_managed_allocations(init_cuda
     buffer.close()
 
 
+def test_managed_memory_discard_prefetch_supports_managed_pool_allocations(init_cuda):
+    device = Device()
+    skip_if_managed_memory_unsupported(device)
+    _skip_if_managed_discard_prefetch_unsupported(device)
+    device.set_current()
+
+    mr = create_managed_memory_resource_or_skip()
+    buffer = mr.allocate(_MANAGED_TEST_ALLOCATION_SIZE)
+    stream = device.create_stream()
+
+    managed_memory.prefetch(buffer, _HOST_LOCATION_ID, stream=stream)
+    stream.sync()
+
+    managed_memory.discard_prefetch(buffer, device, stream=stream)
+    stream.sync()
+
+    last_location = _get_int_mem_range_attr(
+        buffer,
+        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION,
+    )
+    assert last_location == device.device_id
+
+    buffer.close()
+
+
 def test_managed_memory_discard_prefetch_supports_external_managed_allocations(init_cuda):
     device = Device()
     _skip_if_managed_discard_prefetch_unsupported(device)

From c250c92e47393fa6cb0e6611245c5a4dd0c3b6cf Mon Sep 17 00:00:00 2001
From: Rob Parolin <rparolin@nvidia.com>
Date: Wed, 18 Mar 2026 09:21:11 -0700
Subject: [PATCH 10/16] fix(test): reset _V2_BINDINGS cache so legacy-signature
 tests take the legacy path

The _V2_BINDINGS cache in _buffer.pyx persists across tests, so
monkeypatching get_binding_version alone is insufficient when earlier
tests have already populated the cache with the v2 value. Promote
_V2_BINDINGS from cdef int to a Python-level variable so tests can
monkeypatch it directly via monkeypatch.setattr, and reset it to -1
in both legacy-signature tests.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cuda_core/cuda/core/_memory/_buffer.pyx | 2 +-
 cuda_core/tests/test_memory.py          | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/cuda_core/cuda/core/_memory/_buffer.pyx b/cuda_core/cuda/core/_memory/_buffer.pyx
index 6f5809e06c..d109de2ac4 100644
--- a/cuda_core/cuda/core/_memory/_buffer.pyx
+++ b/cuda_core/cuda/core/_memory/_buffer.pyx
@@ -124,7 +124,7 @@ cdef unsigned long long _MANAGED_OPERATION_FLAGS = 0
 # Lazily cached values for immutable runtime properties.
 cdef object _CU_DEVICE_CPU = None
 cdef dict _ADVICE_ENUM_TO_ALIAS = None
-cdef int _V2_BINDINGS = -1
+_V2_BINDINGS = -1
 cdef int _DISCARD_PREFETCH_SUPPORTED = -1
 
 
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index 9cd3209d8d..411a3c6cb5 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -1314,6 +1314,7 @@ def fake_cuMemAdvise(ptr, size, advice, location):
         return (driver.CUresult.CUDA_SUCCESS,)
 
     monkeypatch.setattr(_buffer, "get_binding_version", lambda: _LEGACY_BINDINGS_VERSION)
+    monkeypatch.setattr(_buffer, "_V2_BINDINGS", -1)
     monkeypatch.setattr(_buffer.driver, "cuMemAdvise", fake_cuMemAdvise)
 
     managed_memory.advise(buffer, "set_read_mostly")
@@ -1338,6 +1339,7 @@ def fake_cuMemPrefetchAsync(ptr, size, location, hstream):
         return (driver.CUresult.CUDA_SUCCESS,)
 
     monkeypatch.setattr(_buffer, "get_binding_version", lambda: _LEGACY_BINDINGS_VERSION)
+    monkeypatch.setattr(_buffer, "_V2_BINDINGS", -1)
     monkeypatch.setattr(_buffer.driver, "cuMemPrefetchAsync", fake_cuMemPrefetchAsync)
 
     managed_memory.prefetch(buffer, device, stream=stream)

From 89329d9c6eff581445b4806fe0217e598a2313fa Mon Sep 17 00:00:00 2001
From: Rob Parolin <rparolin@nvidia.com>
Date: Wed, 18 Mar 2026 10:18:41 -0700
Subject: [PATCH 11/16] fix(test): require concurrent_managed_access for advise
 tests that hit real hardware

These three tests call cuMemAdvise on real CUDA devices and verify
memory range attributes. On devices without concurrent_managed_access
(e.g. Windows/WDDM), set_read_mostly silently no-ops and
set_preferred_location fails with CUDA_ERROR_INVALID_DEVICE. Use the
stricter _skip_if_managed_location_ops_unsupported guard, matching the
pattern already used by test_managed_memory_functions_accept_raw_pointer_ranges.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cuda_core/tests/test_memory.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index 411a3c6cb5..56c505fbe6 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -1207,7 +1207,7 @@ def test_managed_memory_prefetch_supports_managed_pool_allocations(init_cuda):
 
 def test_managed_memory_advise_supports_external_managed_allocations(init_cuda):
     device = Device()
-    _skip_if_managed_allocation_unsupported(device)
+    _skip_if_managed_location_ops_unsupported(device)
     device.set_current()
 
     buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
@@ -1390,7 +1390,7 @@ def test_managed_memory_operation_validation(init_cuda):
 def test_managed_memory_advise_location_validation(init_cuda):
     """Verify doc-specified location constraints for each advice kind."""
     device = Device()
-    _skip_if_managed_allocation_unsupported(device)
+    _skip_if_managed_location_ops_unsupported(device)
     device.set_current()
 
     buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
@@ -1422,7 +1422,7 @@ def test_managed_memory_advise_location_validation(init_cuda):
 def test_managed_memory_advise_accepts_enum_value(init_cuda):
     """advise() accepts CUmem_advise enum values directly, not just string aliases."""
     device = Device()
-    _skip_if_managed_allocation_unsupported(device)
+    _skip_if_managed_location_ops_unsupported(device)
     device.set_current()
 
     buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)

From 8a75d1bf1f1172e4681bb232a22f00ff9567d5d8 Mon Sep 17 00:00:00 2001
From: Rob Parolin <rparolin@nvidia.com>
Date: Wed, 18 Mar 2026 11:23:53 -0700
Subject: [PATCH 12/16] fix: validate managed buffer before checking
 discard_prefetch bindings support

Reorder checks in discard_prefetch so _normalize_managed_target_range
runs before _require_managed_discard_prefetch_support. This ensures
non-managed buffers raise ValueError before the RuntimeError for missing
cuMemDiscardAndPrefetchBatchAsync support.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cuda_core/cuda/core/_memory/_buffer.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cuda_core/cuda/core/_memory/_buffer.pyx b/cuda_core/cuda/core/_memory/_buffer.pyx
index d109de2ac4..ffd82facb5 100644
--- a/cuda_core/cuda/core/_memory/_buffer.pyx
+++ b/cuda_core/cuda/core/_memory/_buffer.pyx
@@ -489,13 +489,13 @@ def discard_prefetch(
         Explicit location kind. Supported values are ``"device"``, ``"host"``,
         ``"host_numa"``, and ``"host_numa_current"``.
     """
-    _require_managed_discard_prefetch_support("discard_prefetch")
-    cdef Stream s = Stream_accept(stream)
     cdef object ptr
     cdef object batch_ptr
     cdef size_t nbytes
 
     ptr, nbytes = _normalize_managed_target_range(target, size, "discard_prefetch")
+    _require_managed_discard_prefetch_support("discard_prefetch")
+    cdef Stream s = Stream_accept(stream)
     batch_ptr = driver.CUdeviceptr(int(ptr))
     location = _normalize_managed_location(
         location,

From 9e9b1e0914d30f855389a349cf8d41d134b1c4dc Mon Sep 17 00:00:00 2001
From: Rob Parolin <rparolin@nvidia.com>
Date: Wed, 18 Mar 2026 14:08:24 -0700
Subject: [PATCH 13/16] refactor: extract managed memory ops into dedicated
 _managed_memory_ops module

Move advise, prefetch, and discard_prefetch functions and their helpers
out of _buffer.pyx into a new _managed_memory_ops Cython module to
improve separation of concerns. Expose _init_mem_attrs and
_query_memory_attrs as non-inline cdef functions in _buffer.pxd so the
new module can reuse them.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cuda_core/cuda/core/_memory/_buffer.pxd       |   8 +
 cuda_core/cuda/core/_memory/_buffer.pyx       | 449 +----------------
 .../cuda/core/_memory/_managed_memory_ops.pxd |   6 +
 .../cuda/core/_memory/_managed_memory_ops.pyx | 458 ++++++++++++++++++
 cuda_core/cuda/core/managed_memory.py         |   2 +-
 cuda_core/tests/test_memory.py                |  14 +-
 6 files changed, 483 insertions(+), 454 deletions(-)
 create mode 100644 cuda_core/cuda/core/_memory/_managed_memory_ops.pxd
 create mode 100644 cuda_core/cuda/core/_memory/_managed_memory_ops.pyx

diff --git a/cuda_core/cuda/core/_memory/_buffer.pxd b/cuda_core/cuda/core/_memory/_buffer.pxd
index 04b5707e18..9065da77eb 100644
--- a/cuda_core/cuda/core/_memory/_buffer.pxd
+++ b/cuda_core/cuda/core/_memory/_buffer.pxd
@@ -4,6 +4,7 @@
 
 from libc.stdint cimport uintptr_t
 
+from cuda.bindings cimport cydriver
 from cuda.core._resource_handles cimport DevicePtrHandle
 from cuda.core._stream cimport Stream
 
@@ -38,3 +39,10 @@ cdef Buffer Buffer_from_deviceptr_handle(
     MemoryResource mr,
     object ipc_descriptor = *
 )
+
+# Memory attribute query helpers (used by _managed_memory_ops)
+cdef void _init_mem_attrs(Buffer self)
+cdef int _query_memory_attrs(
+    _MemAttrs& out,
+    cydriver.CUdeviceptr ptr,
+) except -1 nogil
diff --git a/cuda_core/cuda/core/_memory/_buffer.pyx b/cuda_core/cuda/core/_memory/_buffer.pyx
index ffd82facb5..104252a62b 100644
--- a/cuda_core/cuda/core/_memory/_buffer.pyx
+++ b/cuda_core/cuda/core/_memory/_buffer.pyx
@@ -35,7 +35,7 @@ else:
     BufferProtocol = object
 
 from cuda.core._dlpack import DLDeviceType, make_py_capsule
-from cuda.core._utils.cuda_utils import driver, get_binding_version, handle_return
+from cuda.core._utils.cuda_utils import driver, handle_return
 from cuda.core._device import Device
 
 
@@ -72,449 +72,6 @@ A type union of :obj:`~driver.CUdeviceptr`, `int` and `None` for hinting
 """
 
 
-cdef tuple _VALID_MANAGED_LOCATION_TYPES = (
-    "device",
-    "host",
-    "host_numa",
-    "host_numa_current",
-)
-
-cdef dict _MANAGED_LOCATION_TYPE_ATTRS = {
-    "device": "CU_MEM_LOCATION_TYPE_DEVICE",
-    "host": "CU_MEM_LOCATION_TYPE_HOST",
-    "host_numa": "CU_MEM_LOCATION_TYPE_HOST_NUMA",
-    "host_numa_current": "CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT",
-}
-
-cdef dict _MANAGED_ADVICE_ALIASES = {
-    "set_read_mostly": "CU_MEM_ADVISE_SET_READ_MOSTLY",
-    "unset_read_mostly": "CU_MEM_ADVISE_UNSET_READ_MOSTLY",
-    "set_preferred_location": "CU_MEM_ADVISE_SET_PREFERRED_LOCATION",
-    "unset_preferred_location": "CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION",
-    "set_accessed_by": "CU_MEM_ADVISE_SET_ACCESSED_BY",
-    "unset_accessed_by": "CU_MEM_ADVISE_UNSET_ACCESSED_BY",
-}
-
-cdef frozenset _MANAGED_ADVICE_IGNORE_LOCATION = frozenset((
-    "set_read_mostly",
-    "unset_read_mostly",
-    "unset_preferred_location",
-))
-
-cdef frozenset _ALL_LOCATION_TYPES = frozenset(("device", "host", "host_numa", "host_numa_current"))
-cdef frozenset _DEVICE_HOST_NUMA = frozenset(("device", "host", "host_numa"))
-cdef frozenset _DEVICE_HOST_ONLY = frozenset(("device", "host"))
-
-cdef dict _MANAGED_ADVICE_ALLOWED_LOCTYPES = {
-    "set_read_mostly": _DEVICE_HOST_NUMA,
-    "unset_read_mostly": _DEVICE_HOST_NUMA,
-    "set_preferred_location": _ALL_LOCATION_TYPES,
-    "unset_preferred_location": _DEVICE_HOST_NUMA,
-    "set_accessed_by": _DEVICE_HOST_ONLY,
-    "unset_accessed_by": _DEVICE_HOST_ONLY,
-}
-
-cdef int _MANAGED_SIZE_NOT_PROVIDED = -1
-cdef int _HOST_NUMA_CURRENT_ID = 0
-cdef int _FIRST_PREFETCH_LOCATION_INDEX = 0
-cdef size_t _SINGLE_RANGE_COUNT = 1
-cdef size_t _SINGLE_PREFETCH_LOCATION_COUNT = 1
-cdef unsigned long long _MANAGED_OPERATION_FLAGS = 0
-
-# Lazily cached values for immutable runtime properties.
-cdef object _CU_DEVICE_CPU = None
-cdef dict _ADVICE_ENUM_TO_ALIAS = None
-_V2_BINDINGS = -1
-cdef int _DISCARD_PREFETCH_SUPPORTED = -1
-
-
-cdef inline object _managed_location_enum(str location_type):
-    cdef str attr_name = _MANAGED_LOCATION_TYPE_ATTRS[location_type]
-    cdef object result = getattr(driver.CUmemLocationType, attr_name, None)
-    if result is None:
-        raise RuntimeError(
-            f"Managed-memory location type {location_type!r} is not supported by the "
-            f"installed cuda.bindings package."
-        )
-    return result
-
-
-cdef inline object _make_managed_location(str location_type, int location_id):
-    global _CU_DEVICE_CPU
-    cdef object location = driver.CUmemLocation()
-    location.type = _managed_location_enum(location_type)
-    if location_type == "host":
-        if _CU_DEVICE_CPU is None:
-            _CU_DEVICE_CPU = int(getattr(driver, "CU_DEVICE_CPU", -1))
-        location.id = _CU_DEVICE_CPU
-    elif location_type == "host_numa_current":
-        location.id = _HOST_NUMA_CURRENT_ID
-    else:
-        location.id = location_id
-    return location
-
-
-cdef inline tuple _normalize_managed_advice(object advice):
-    cdef str alias
-    cdef str attr_name
-    if isinstance(advice, str):
-        alias = advice.lower()
-        attr_name = _MANAGED_ADVICE_ALIASES.get(alias)
-        if attr_name is None:
-            raise ValueError(
-                "advice must be one of "
-                f"{tuple(sorted(_MANAGED_ADVICE_ALIASES))!r}, got {advice!r}"
-            )
-        return alias, getattr(driver.CUmem_advise, attr_name)
-
-    if isinstance(advice, driver.CUmem_advise):
-        global _ADVICE_ENUM_TO_ALIAS
-        if _ADVICE_ENUM_TO_ALIAS is None:
-            _ADVICE_ENUM_TO_ALIAS = {}
-            for alias, attr_name in _MANAGED_ADVICE_ALIASES.items():
-                enum_val = getattr(driver.CUmem_advise, attr_name, None)
-                if enum_val is not None:
-                    _ADVICE_ENUM_TO_ALIAS[enum_val] = alias
-        alias = _ADVICE_ENUM_TO_ALIAS.get(advice)
-        if alias is None:
-            raise ValueError(f"Unsupported advice value: {advice!r}")
-        return alias, advice
-
-    raise TypeError(
-        "advice must be a cuda.bindings.driver.CUmem_advise value or a supported string alias"
-    )
-
-
-cdef inline object _normalize_managed_location(
-    object location,
-    object location_type,
-    str what,
-    bint allow_none=False,
-    frozenset allowed_loctypes=_ALL_LOCATION_TYPES,
-):
-    cdef object loc_type
-    cdef int loc_id
-
-    if isinstance(location, Device):
-        location = location.device_id
-
-    if location_type is not None and not isinstance(location_type, str):
-        raise TypeError(f"{what} location_type must be a string or None, got {type(location_type).__name__}")
-
-    loc_type = None if location_type is None else (<str>location_type).lower()
-    if loc_type is not None and loc_type not in _VALID_MANAGED_LOCATION_TYPES:
-        raise ValueError(
-            f"{what} location_type must be one of {_VALID_MANAGED_LOCATION_TYPES!r} "
-            f"or None, got {location_type!r}"
-        )
-
-    if loc_type is not None and loc_type not in allowed_loctypes:
-        raise ValueError(f"{what} does not support location_type='{loc_type}'")
-
-    if loc_type is None:
-        if location is None:
-            if allow_none:
-                return _make_managed_location("host", -1)
-            raise ValueError(f"{what} requires a location")
-        if not isinstance(location, int):
-            raise TypeError(
-                f"{what} location must be a Device, int, or None, got {type(location).__name__}"
-            )
-        loc_id = <int>location
-        if loc_id == -1:
-            if "host" not in allowed_loctypes:
-                raise ValueError(f"{what} does not support host locations")
-            return _make_managed_location("host", -1)
-        elif loc_id >= 0:
-            return _make_managed_location("device", loc_id)
-        else:
-            raise ValueError(
-                f"{what} location must be a device ordinal (>= 0), -1 for host, or None; got {location!r}"
-            )
-    elif loc_type == "device":
-        if isinstance(location, int) and <int>location >= 0:
-            loc_id = <int>location
-        else:
-            raise ValueError(
-                f"{what} location must be a device ordinal (>= 0) when location_type is 'device', got {location!r}"
-            )
-        return _make_managed_location(loc_type, loc_id)
-    elif loc_type == "host":
-        if location not in (None, -1):
-            raise ValueError(
-                f"{what} location must be None or -1 when location_type is 'host', got {location!r}"
-            )
-        return _make_managed_location(loc_type, -1)
-    elif loc_type == "host_numa":
-        if not isinstance(location, int) or <int>location < 0:
-            raise ValueError(
-                f"{what} location must be a NUMA node ID (>= 0) when location_type is 'host_numa', got {location!r}"
-            )
-        return _make_managed_location(loc_type, <int>location)
-    else:
-        if location is not None:
-            raise ValueError(
-                f"{what} location must be None when location_type is 'host_numa_current', got {location!r}"
-            )
-        return _make_managed_location(loc_type, _HOST_NUMA_CURRENT_ID)
-
-
-cdef inline bint _managed_location_uses_v2_bindings():
-    # cuda.bindings 13.x switches these APIs to CUmemLocation-based wrappers.
-    global _V2_BINDINGS
-    if _V2_BINDINGS < 0:
-        _V2_BINDINGS = 1 if get_binding_version() >= (13, 0) else 0
-    return _V2_BINDINGS != 0
-
-
-cdef object _LEGACY_LOC_DEVICE = None
-cdef object _LEGACY_LOC_HOST = None
-
-cdef inline int _managed_location_to_legacy_device(object location, str what):
-    global _LEGACY_LOC_DEVICE, _LEGACY_LOC_HOST
-    if _LEGACY_LOC_DEVICE is None:
-        _LEGACY_LOC_DEVICE = _managed_location_enum("device")
-        _LEGACY_LOC_HOST = _managed_location_enum("host")
-    cdef object loc_type = location.type
-    if loc_type == _LEGACY_LOC_DEVICE or loc_type == _LEGACY_LOC_HOST:
-        return <int>location.id
-    raise RuntimeError(
-        f"{what} requires cuda.bindings 13.x for location_type={loc_type!r}"
-    )
-
-
-cdef inline void _require_managed_buffer(Buffer self, str what):
-    _init_mem_attrs(self)
-    if not self._mem_attrs.is_managed:
-        raise ValueError(f"{what} requires a managed-memory allocation")
-
-
-cdef inline void _require_managed_discard_prefetch_support(str what):
-    global _DISCARD_PREFETCH_SUPPORTED
-    if _DISCARD_PREFETCH_SUPPORTED < 0:
-        _DISCARD_PREFETCH_SUPPORTED = 1 if hasattr(driver, "cuMemDiscardAndPrefetchBatchAsync") else 0
-    if not _DISCARD_PREFETCH_SUPPORTED:
-        raise RuntimeError(
-            f"{what} requires cuda.bindings support for cuMemDiscardAndPrefetchBatchAsync"
-        )
-
-
-cdef inline tuple _managed_range_from_buffer(
-    Buffer buffer,
-    int size,
-    str what,
-):
-    if size != _MANAGED_SIZE_NOT_PROVIDED:
-        raise TypeError(f"{what} does not accept size= when target is a Buffer")
-    _require_managed_buffer(buffer, what)
-    return buffer.handle, buffer._size
-
-
-cdef inline uintptr_t _coerce_raw_pointer(object target, str what) except? 0:
-    cdef object ptr_obj
-    try:
-        ptr_obj = int(target)
-    except Exception as exc:
-        raise TypeError(
-            f"{what} target must be a Buffer or a raw pointer, got {type(target).__name__}"
-        ) from exc
-    if ptr_obj < 0:
-        raise ValueError(f"{what} target pointer must be >= 0, got {target!r}")
-    return <uintptr_t>ptr_obj
-
-
-cdef inline int _require_managed_pointer(uintptr_t ptr, str what) except -1:
-    cdef _MemAttrs mem_attrs
-    with nogil:
-        _query_memory_attrs(mem_attrs, <cydriver.CUdeviceptr>ptr)
-    if not mem_attrs.is_managed:
-        raise ValueError(f"{what} requires a managed-memory allocation")
-    return 0
-
-
-cdef inline tuple _normalize_managed_target_range(
-    object target,
-    int size,
-    str what,
-):
-    cdef uintptr_t ptr
-
-    if isinstance(target, Buffer):
-        return _managed_range_from_buffer(<Buffer>target, size, what)
-
-    if size == _MANAGED_SIZE_NOT_PROVIDED:
-        raise TypeError(f"{what} requires size= when target is a raw pointer")
-    ptr = _coerce_raw_pointer(target, what)
-    _require_managed_pointer(ptr, what)
-    return ptr, <size_t>size
-
-
-def advise(
-    target,
-    advice: driver.CUmem_advise | str,
-    location: Device | int | None = None,
-    *,
-    int size=_MANAGED_SIZE_NOT_PROVIDED,
-    location_type: str | None = None,
-):
-    """Apply managed-memory advice to an allocation range.
-
-    Parameters
-    ----------
-    target : :class:`Buffer` | int | object
-        Managed allocation to operate on. This may be a :class:`Buffer` or a
-        raw pointer (requires ``size=``).
-    advice : :obj:`~driver.CUmem_advise` | str
-        Managed-memory advice to apply. String aliases such as
-        ``"set_read_mostly"``, ``"set_preferred_location"``, and
-        ``"set_accessed_by"`` are accepted.
-    location : :obj:`~_device.Device` | int | None, optional
-        Target location. When ``location_type`` is ``None``, values are
-        interpreted as a device ordinal, ``-1`` for host, or ``None`` for
-        advice values that ignore location.
-    size : int, optional
-        Allocation size in bytes. Required when ``target`` is a raw pointer.
-    location_type : str | None, optional
-        Explicit location kind. Supported values are ``"device"``, ``"host"``,
-        ``"host_numa"``, and ``"host_numa_current"``.
-    """
-    cdef str advice_name
-    cdef object ptr
-    cdef size_t nbytes
-
-    ptr, nbytes = _normalize_managed_target_range(target, size, "advise")
-    advice_name, advice = _normalize_managed_advice(advice)
-    location = _normalize_managed_location(
-        location,
-        location_type,
-        "advise",
-        allow_none=advice_name in _MANAGED_ADVICE_IGNORE_LOCATION,
-        allowed_loctypes=_MANAGED_ADVICE_ALLOWED_LOCTYPES[advice_name],
-    )
-    if _managed_location_uses_v2_bindings():
-        handle_return(driver.cuMemAdvise(ptr, nbytes, advice, location))
-    else:
-        handle_return(
-            driver.cuMemAdvise(
-                ptr,
-                nbytes,
-                advice,
-                _managed_location_to_legacy_device(location, "advise"),
-            )
-        )
-
-
-def prefetch(
-    target,
-    location: Device | int | None = None,
-    *,
-    stream: Stream | GraphBuilder,
-    int size=_MANAGED_SIZE_NOT_PROVIDED,
-    location_type: str | None = None,
-):
-    """Prefetch a managed-memory allocation range to a target location.
-
-    Parameters
-    ----------
-    target : :class:`Buffer` | int | object
-        Managed allocation to operate on. This may be a :class:`Buffer` or a
-        raw pointer (requires ``size=``).
-    location : :obj:`~_device.Device` | int | None, optional
-        Target location. When ``location_type`` is ``None``, values are
-        interpreted as a device ordinal, ``-1`` for host, or ``None``.
-        A location is required for prefetch.
-    stream : :obj:`~_stream.Stream` | :obj:`~_graph.GraphBuilder`
-        Keyword argument specifying the stream for the asynchronous prefetch.
-    size : int, optional
-        Allocation size in bytes. Required when ``target`` is a raw pointer.
-    location_type : str | None, optional
-        Explicit location kind. Supported values are ``"device"``, ``"host"``,
-        ``"host_numa"``, and ``"host_numa_current"``.
-    """
-    cdef Stream s = Stream_accept(stream)
-    cdef object ptr
-    cdef size_t nbytes
-
-    ptr, nbytes = _normalize_managed_target_range(target, size, "prefetch")
-    location = _normalize_managed_location(
-        location,
-        location_type,
-        "prefetch",
-    )
-    if _managed_location_uses_v2_bindings():
-        handle_return(
-            driver.cuMemPrefetchAsync(
-                ptr,
-                nbytes,
-                location,
-                _MANAGED_OPERATION_FLAGS,
-                s.handle,
-            )
-        )
-    else:
-        handle_return(
-            driver.cuMemPrefetchAsync(
-                ptr,
-                nbytes,
-                _managed_location_to_legacy_device(location, "prefetch"),
-                s.handle,
-            )
-        )
-
-
-def discard_prefetch(
-    target,
-    location: Device | int | None = None,
-    *,
-    stream: Stream | GraphBuilder,
-    int size=_MANAGED_SIZE_NOT_PROVIDED,
-    location_type: str | None = None,
-):
-    """Discard a managed-memory allocation range and prefetch it to a target location.
-
-    Parameters
-    ----------
-    target : :class:`Buffer` | int | object
-        Managed allocation to operate on. This may be a :class:`Buffer` or a
-        raw pointer (requires ``size=``).
-    location : :obj:`~_device.Device` | int | None, optional
-        Target location. When ``location_type`` is ``None``, values are
-        interpreted as a device ordinal, ``-1`` for host, or ``None``.
-        A location is required for discard_prefetch.
-    stream : :obj:`~_stream.Stream` | :obj:`~_graph.GraphBuilder`
-        Keyword argument specifying the stream for the asynchronous operation.
-    size : int, optional
-        Allocation size in bytes. Required when ``target`` is a raw pointer.
-    location_type : str | None, optional
-        Explicit location kind. Supported values are ``"device"``, ``"host"``,
-        ``"host_numa"``, and ``"host_numa_current"``.
-    """
-    cdef object ptr
-    cdef object batch_ptr
-    cdef size_t nbytes
-
-    ptr, nbytes = _normalize_managed_target_range(target, size, "discard_prefetch")
-    _require_managed_discard_prefetch_support("discard_prefetch")
-    cdef Stream s = Stream_accept(stream)
-    batch_ptr = driver.CUdeviceptr(int(ptr))
-    location = _normalize_managed_location(
-        location,
-        location_type,
-        "discard_prefetch",
-    )
-    handle_return(
-        driver.cuMemDiscardAndPrefetchBatchAsync(
-            [batch_ptr],
-            [nbytes],
-            _SINGLE_RANGE_COUNT,
-            [location],
-            [_FIRST_PREFETCH_LOCATION_INDEX],
-            _SINGLE_PREFETCH_LOCATION_COUNT,
-            _MANAGED_OPERATION_FLAGS,
-            s.handle,
-        )
-    )
-
 cdef class Buffer:
     """Represent a handle to allocated memory.
 
@@ -864,14 +421,14 @@ cdef class Buffer:
 
 # Memory Attribute Query Helpers
 # ------------------------------
-cdef inline void _init_mem_attrs(Buffer self):
+cdef void _init_mem_attrs(Buffer self):
     """Initialize memory attributes by querying the pointer."""
     if not self._mem_attrs_inited:
         _query_memory_attrs(self._mem_attrs, as_cu(self._h_ptr))
         self._mem_attrs_inited = True
 
 
-cdef inline int _query_memory_attrs(
+cdef int _query_memory_attrs(
     _MemAttrs& out,
     cydriver.CUdeviceptr ptr
 ) except -1 nogil:
diff --git a/cuda_core/cuda/core/_memory/_managed_memory_ops.pxd b/cuda_core/cuda/core/_memory/_managed_memory_ops.pxd
new file mode 100644
index 0000000000..a7019c784d
--- /dev/null
+++ b/cuda_core/cuda/core/_memory/_managed_memory_ops.pxd
@@ -0,0 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Managed-memory operation helpers (advise, prefetch, discard_prefetch).
+# The public API is exposed via def functions; no cdef declarations needed.
diff --git a/cuda_core/cuda/core/_memory/_managed_memory_ops.pyx b/cuda_core/cuda/core/_memory/_managed_memory_ops.pyx
new file mode 100644
index 0000000000..649c2cbe72
--- /dev/null
+++ b/cuda_core/cuda/core/_memory/_managed_memory_ops.pyx
@@ -0,0 +1,458 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import annotations
+
+from libc.stdint cimport uintptr_t
+
+from cuda.bindings cimport cydriver
+from cuda.core._memory._buffer cimport Buffer, _MemAttrs, _init_mem_attrs, _query_memory_attrs
+from cuda.core._stream cimport Stream, Stream_accept
+
+from cuda.core._utils.cuda_utils import driver, get_binding_version, handle_return
+from cuda.core._device import Device
+
+
+cdef tuple _VALID_MANAGED_LOCATION_TYPES = (
+    "device",
+    "host",
+    "host_numa",
+    "host_numa_current",
+)
+
+cdef dict _MANAGED_LOCATION_TYPE_ATTRS = {
+    "device": "CU_MEM_LOCATION_TYPE_DEVICE",
+    "host": "CU_MEM_LOCATION_TYPE_HOST",
+    "host_numa": "CU_MEM_LOCATION_TYPE_HOST_NUMA",
+    "host_numa_current": "CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT",
+}
+
+cdef dict _MANAGED_ADVICE_ALIASES = {
+    "set_read_mostly": "CU_MEM_ADVISE_SET_READ_MOSTLY",
+    "unset_read_mostly": "CU_MEM_ADVISE_UNSET_READ_MOSTLY",
+    "set_preferred_location": "CU_MEM_ADVISE_SET_PREFERRED_LOCATION",
+    "unset_preferred_location": "CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION",
+    "set_accessed_by": "CU_MEM_ADVISE_SET_ACCESSED_BY",
+    "unset_accessed_by": "CU_MEM_ADVISE_UNSET_ACCESSED_BY",
+}
+
+cdef frozenset _MANAGED_ADVICE_IGNORE_LOCATION = frozenset((
+    "set_read_mostly",
+    "unset_read_mostly",
+    "unset_preferred_location",
+))
+
+cdef frozenset _ALL_LOCATION_TYPES = frozenset(("device", "host", "host_numa", "host_numa_current"))
+cdef frozenset _DEVICE_HOST_NUMA = frozenset(("device", "host", "host_numa"))
+cdef frozenset _DEVICE_HOST_ONLY = frozenset(("device", "host"))
+
+cdef dict _MANAGED_ADVICE_ALLOWED_LOCTYPES = {
+    "set_read_mostly": _DEVICE_HOST_NUMA,
+    "unset_read_mostly": _DEVICE_HOST_NUMA,
+    "set_preferred_location": _ALL_LOCATION_TYPES,
+    "unset_preferred_location": _DEVICE_HOST_NUMA,
+    "set_accessed_by": _DEVICE_HOST_ONLY,
+    "unset_accessed_by": _DEVICE_HOST_ONLY,
+}
+
+cdef int _MANAGED_SIZE_NOT_PROVIDED = -1
+cdef int _HOST_NUMA_CURRENT_ID = 0
+cdef int _FIRST_PREFETCH_LOCATION_INDEX = 0
+cdef size_t _SINGLE_RANGE_COUNT = 1
+cdef size_t _SINGLE_PREFETCH_LOCATION_COUNT = 1
+cdef unsigned long long _MANAGED_OPERATION_FLAGS = 0
+
+# Lazily cached values for immutable runtime properties.
+cdef object _CU_DEVICE_CPU = None
+cdef dict _ADVICE_ENUM_TO_ALIAS = None
+_V2_BINDINGS = -1
+cdef int _DISCARD_PREFETCH_SUPPORTED = -1
+
+
+cdef object _managed_location_enum(str location_type):
+    cdef str attr_name = _MANAGED_LOCATION_TYPE_ATTRS[location_type]
+    cdef object result = getattr(driver.CUmemLocationType, attr_name, None)
+    if result is None:
+        raise RuntimeError(
+            f"Managed-memory location type {location_type!r} is not supported by the "
+            f"installed cuda.bindings package."
+        )
+    return result
+
+
+cdef object _make_managed_location(str location_type, int location_id):
+    global _CU_DEVICE_CPU
+    cdef object location = driver.CUmemLocation()
+    location.type = _managed_location_enum(location_type)
+    if location_type == "host":
+        if _CU_DEVICE_CPU is None:
+            _CU_DEVICE_CPU = int(getattr(driver, "CU_DEVICE_CPU", -1))
+        location.id = _CU_DEVICE_CPU
+    elif location_type == "host_numa_current":
+        location.id = _HOST_NUMA_CURRENT_ID
+    else:
+        location.id = location_id
+    return location
+
+
+cdef tuple _normalize_managed_advice(object advice):
+    cdef str alias
+    cdef str attr_name
+    if isinstance(advice, str):
+        alias = advice.lower()
+        attr_name = _MANAGED_ADVICE_ALIASES.get(alias)
+        if attr_name is None:
+            raise ValueError(
+                "advice must be one of "
+                f"{tuple(sorted(_MANAGED_ADVICE_ALIASES))!r}, got {advice!r}"
+            )
+        return alias, getattr(driver.CUmem_advise, attr_name)
+
+    if isinstance(advice, driver.CUmem_advise):
+        global _ADVICE_ENUM_TO_ALIAS
+        if _ADVICE_ENUM_TO_ALIAS is None:
+            _ADVICE_ENUM_TO_ALIAS = {}
+            for alias, attr_name in _MANAGED_ADVICE_ALIASES.items():
+                enum_val = getattr(driver.CUmem_advise, attr_name, None)
+                if enum_val is not None:
+                    _ADVICE_ENUM_TO_ALIAS[enum_val] = alias
+        alias = _ADVICE_ENUM_TO_ALIAS.get(advice)
+        if alias is None:
+            raise ValueError(f"Unsupported advice value: {advice!r}")
+        return alias, advice
+
+    raise TypeError(
+        "advice must be a cuda.bindings.driver.CUmem_advise value or a supported string alias"
+    )
+
+
+cdef object _normalize_managed_location(
+    object location,
+    object location_type,
+    str what,
+    bint allow_none=False,
+    frozenset allowed_loctypes=_ALL_LOCATION_TYPES,
+):
+    cdef object loc_type
+    cdef int loc_id
+
+    if isinstance(location, Device):
+        location = location.device_id
+
+    if location_type is not None and not isinstance(location_type, str):
+        raise TypeError(f"{what} location_type must be a string or None, got {type(location_type).__name__}")
+
+    loc_type = None if location_type is None else (<str>location_type).lower()
+    if loc_type is not None and loc_type not in _VALID_MANAGED_LOCATION_TYPES:
+        raise ValueError(
+            f"{what} location_type must be one of {_VALID_MANAGED_LOCATION_TYPES!r} "
+            f"or None, got {location_type!r}"
+        )
+
+    if loc_type is not None and loc_type not in allowed_loctypes:
+        raise ValueError(f"{what} does not support location_type='{loc_type}'")
+
+    if loc_type is None:
+        if location is None:
+            if allow_none:
+                return _make_managed_location("host", -1)
+            raise ValueError(f"{what} requires a location")
+        if not isinstance(location, int):
+            raise TypeError(
+                f"{what} location must be a Device, int, or None, got {type(location).__name__}"
+            )
+        loc_id = <int>location
+        if loc_id == -1:
+            if "host" not in allowed_loctypes:
+                raise ValueError(f"{what} does not support host locations")
+            return _make_managed_location("host", -1)
+        elif loc_id >= 0:
+            return _make_managed_location("device", loc_id)
+        else:
+            raise ValueError(
+                f"{what} location must be a device ordinal (>= 0), -1 for host, or None; got {location!r}"
+            )
+    elif loc_type == "device":
+        if isinstance(location, int) and <int>location >= 0:
+            loc_id = <int>location
+        else:
+            raise ValueError(
+                f"{what} location must be a device ordinal (>= 0) when location_type is 'device', got {location!r}"
+            )
+        return _make_managed_location(loc_type, loc_id)
+    elif loc_type == "host":
+        if location not in (None, -1):
+            raise ValueError(
+                f"{what} location must be None or -1 when location_type is 'host', got {location!r}"
+            )
+        return _make_managed_location(loc_type, -1)
+    elif loc_type == "host_numa":
+        if not isinstance(location, int) or <int>location < 0:
+            raise ValueError(
+                f"{what} location must be a NUMA node ID (>= 0) when location_type is 'host_numa', got {location!r}"
+            )
+        return _make_managed_location(loc_type, <int>location)
+    else:
+        if location is not None:
+            raise ValueError(
+                f"{what} location must be None when location_type is 'host_numa_current', got {location!r}"
+            )
+        return _make_managed_location(loc_type, _HOST_NUMA_CURRENT_ID)
+
+
+cdef bint _managed_location_uses_v2_bindings():
+    # cuda.bindings 13.x switches these APIs to CUmemLocation-based wrappers.
+    global _V2_BINDINGS
+    if _V2_BINDINGS < 0:
+        _V2_BINDINGS = 1 if get_binding_version() >= (13, 0) else 0
+    return _V2_BINDINGS != 0
+
+
+cdef object _LEGACY_LOC_DEVICE = None
+cdef object _LEGACY_LOC_HOST = None
+
+cdef int _managed_location_to_legacy_device(object location, str what):
+    global _LEGACY_LOC_DEVICE, _LEGACY_LOC_HOST
+    if _LEGACY_LOC_DEVICE is None:
+        _LEGACY_LOC_DEVICE = _managed_location_enum("device")
+        _LEGACY_LOC_HOST = _managed_location_enum("host")
+    cdef object loc_type = location.type
+    if loc_type == _LEGACY_LOC_DEVICE or loc_type == _LEGACY_LOC_HOST:
+        return <int>location.id
+    raise RuntimeError(
+        f"{what} requires cuda.bindings 13.x for location_type={loc_type!r}"
+    )
+
+
+cdef void _require_managed_buffer(Buffer self, str what):
+    _init_mem_attrs(self)
+    if not self._mem_attrs.is_managed:
+        raise ValueError(f"{what} requires a managed-memory allocation")
+
+
+cdef void _require_managed_discard_prefetch_support(str what):
+    global _DISCARD_PREFETCH_SUPPORTED
+    if _DISCARD_PREFETCH_SUPPORTED < 0:
+        _DISCARD_PREFETCH_SUPPORTED = 1 if hasattr(driver, "cuMemDiscardAndPrefetchBatchAsync") else 0
+    if not _DISCARD_PREFETCH_SUPPORTED:
+        raise RuntimeError(
+            f"{what} requires cuda.bindings support for cuMemDiscardAndPrefetchBatchAsync"
+        )
+
+
+cdef tuple _managed_range_from_buffer(
+    Buffer buffer,
+    int size,
+    str what,
+):
+    if size != _MANAGED_SIZE_NOT_PROVIDED:
+        raise TypeError(f"{what} does not accept size= when target is a Buffer")
+    _require_managed_buffer(buffer, what)
+    return buffer.handle, buffer._size
+
+
+cdef uintptr_t _coerce_raw_pointer(object target, str what) except? 0:
+    cdef object ptr_obj
+    try:
+        ptr_obj = int(target)
+    except Exception as exc:
+        raise TypeError(
+            f"{what} target must be a Buffer or a raw pointer, got {type(target).__name__}"
+        ) from exc
+    if ptr_obj < 0:
+        raise ValueError(f"{what} target pointer must be >= 0, got {target!r}")
+    return <uintptr_t>ptr_obj
+
+
+cdef int _require_managed_pointer(uintptr_t ptr, str what) except -1:
+    cdef _MemAttrs mem_attrs
+    with nogil:
+        _query_memory_attrs(mem_attrs, <cydriver.CUdeviceptr>ptr)
+    if not mem_attrs.is_managed:
+        raise ValueError(f"{what} requires a managed-memory allocation")
+    return 0
+
+
+cdef tuple _normalize_managed_target_range(
+    object target,
+    int size,
+    str what,
+):
+    cdef uintptr_t ptr
+
+    if isinstance(target, Buffer):
+        return _managed_range_from_buffer(<Buffer>target, size, what)
+
+    if size == _MANAGED_SIZE_NOT_PROVIDED:
+        raise TypeError(f"{what} requires size= when target is a raw pointer")
+    ptr = _coerce_raw_pointer(target, what)
+    _require_managed_pointer(ptr, what)
+    return ptr, <size_t>size
+
+
+def advise(
+    target,
+    advice: driver.CUmem_advise | str,
+    location: Device | int | None = None,
+    *,
+    int size=_MANAGED_SIZE_NOT_PROVIDED,
+    location_type: str | None = None,
+):
+    """Apply managed-memory advice to an allocation range.
+
+    Parameters
+    ----------
+    target : :class:`Buffer` | int | object
+        Managed allocation to operate on. This may be a :class:`Buffer` or a
+        raw pointer (requires ``size=``).
+    advice : :obj:`~driver.CUmem_advise` | str
+        Managed-memory advice to apply. String aliases such as
+        ``"set_read_mostly"``, ``"set_preferred_location"``, and
+        ``"set_accessed_by"`` are accepted.
+    location : :obj:`~_device.Device` | int | None, optional
+        Target location. When ``location_type`` is ``None``, values are
+        interpreted as a device ordinal, ``-1`` for host, or ``None`` for
+        advice values that ignore location.
+    size : int, optional
+        Allocation size in bytes. Required when ``target`` is a raw pointer.
+    location_type : str | None, optional
+        Explicit location kind. Supported values are ``"device"``, ``"host"``,
+        ``"host_numa"``, and ``"host_numa_current"``.
+    """
+    cdef str advice_name
+    cdef object ptr
+    cdef size_t nbytes
+
+    ptr, nbytes = _normalize_managed_target_range(target, size, "advise")
+    advice_name, advice = _normalize_managed_advice(advice)
+    location = _normalize_managed_location(
+        location,
+        location_type,
+        "advise",
+        allow_none=advice_name in _MANAGED_ADVICE_IGNORE_LOCATION,
+        allowed_loctypes=_MANAGED_ADVICE_ALLOWED_LOCTYPES[advice_name],
+    )
+    if _managed_location_uses_v2_bindings():
+        handle_return(driver.cuMemAdvise(ptr, nbytes, advice, location))
+    else:
+        handle_return(
+            driver.cuMemAdvise(
+                ptr,
+                nbytes,
+                advice,
+                _managed_location_to_legacy_device(location, "advise"),
+            )
+        )
+
+
+def prefetch(
+    target,
+    location: Device | int | None = None,
+    *,
+    stream: Stream | GraphBuilder,
+    int size=_MANAGED_SIZE_NOT_PROVIDED,
+    location_type: str | None = None,
+):
+    """Prefetch a managed-memory allocation range to a target location.
+
+    Parameters
+    ----------
+    target : :class:`Buffer` | int | object
+        Managed allocation to operate on. This may be a :class:`Buffer` or a
+        raw pointer (requires ``size=``).
+    location : :obj:`~_device.Device` | int | None, optional
+        Target location. When ``location_type`` is ``None``, values are
+        interpreted as a device ordinal, ``-1`` for host, or ``None``.
+        A location is required for prefetch.
+    stream : :obj:`~_stream.Stream` | :obj:`~_graph.GraphBuilder`
+        Keyword argument specifying the stream for the asynchronous prefetch.
+    size : int, optional
+        Allocation size in bytes. Required when ``target`` is a raw pointer.
+    location_type : str | None, optional
+        Explicit location kind. Supported values are ``"device"``, ``"host"``,
+        ``"host_numa"``, and ``"host_numa_current"``.
+    """
+    cdef Stream s = Stream_accept(stream)
+    cdef object ptr
+    cdef size_t nbytes
+
+    ptr, nbytes = _normalize_managed_target_range(target, size, "prefetch")
+    location = _normalize_managed_location(
+        location,
+        location_type,
+        "prefetch",
+    )
+    if _managed_location_uses_v2_bindings():
+        handle_return(
+            driver.cuMemPrefetchAsync(
+                ptr,
+                nbytes,
+                location,
+                _MANAGED_OPERATION_FLAGS,
+                s.handle,
+            )
+        )
+    else:
+        handle_return(
+            driver.cuMemPrefetchAsync(
+                ptr,
+                nbytes,
+                _managed_location_to_legacy_device(location, "prefetch"),
+                s.handle,
+            )
+        )
+
+
+def discard_prefetch(
+    target,
+    location: Device | int | None = None,
+    *,
+    stream: Stream | GraphBuilder,
+    int size=_MANAGED_SIZE_NOT_PROVIDED,
+    location_type: str | None = None,
+):
+    """Discard a managed-memory allocation range and prefetch it to a target location.
+
+    Parameters
+    ----------
+    target : :class:`Buffer` | int | object
+        Managed allocation to operate on. This may be a :class:`Buffer` or a
+        raw pointer (requires ``size=``).
+    location : :obj:`~_device.Device` | int | None, optional
+        Target location. When ``location_type`` is ``None``, values are
+        interpreted as a device ordinal, ``-1`` for host, or ``None``.
+        A location is required for discard_prefetch.
+    stream : :obj:`~_stream.Stream` | :obj:`~_graph.GraphBuilder`
+        Keyword argument specifying the stream for the asynchronous operation.
+    size : int, optional
+        Allocation size in bytes. Required when ``target`` is a raw pointer.
+    location_type : str | None, optional
+        Explicit location kind. Supported values are ``"device"``, ``"host"``,
+        ``"host_numa"``, and ``"host_numa_current"``.
+    """
+    cdef object ptr
+    cdef object batch_ptr
+    cdef size_t nbytes
+
+    ptr, nbytes = _normalize_managed_target_range(target, size, "discard_prefetch")
+    _require_managed_discard_prefetch_support("discard_prefetch")
+    cdef Stream s = Stream_accept(stream)
+    batch_ptr = driver.CUdeviceptr(int(ptr))
+    location = _normalize_managed_location(
+        location,
+        location_type,
+        "discard_prefetch",
+    )
+    handle_return(
+        driver.cuMemDiscardAndPrefetchBatchAsync(
+            [batch_ptr],
+            [nbytes],
+            _SINGLE_RANGE_COUNT,
+            [location],
+            [_FIRST_PREFETCH_LOCATION_INDEX],
+            _SINGLE_PREFETCH_LOCATION_COUNT,
+            _MANAGED_OPERATION_FLAGS,
+            s.handle,
+        )
+    )
diff --git a/cuda_core/cuda/core/managed_memory.py b/cuda_core/cuda/core/managed_memory.py
index f5bb09c13d..005c9ec3cf 100644
--- a/cuda_core/cuda/core/managed_memory.py
+++ b/cuda_core/cuda/core/managed_memory.py
@@ -4,6 +4,6 @@
 
 """Managed-memory range operations."""
 
-from cuda.core._memory._buffer import advise, discard_prefetch, prefetch
+from cuda.core._memory._managed_memory_ops import advise, discard_prefetch, prefetch
 
 __all__ = ["advise", "discard_prefetch", "prefetch"]
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index 56c505fbe6..544b7afc03 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -44,7 +44,7 @@
     system as ccx_system,
 )
 from cuda.core._dlpack import DLDeviceType
-from cuda.core._memory import IPCBufferDescriptor, _buffer
+from cuda.core._memory import IPCBufferDescriptor, _managed_memory_ops
 from cuda.core._utils.cuda_utils import CUDAError, handle_return
 from cuda.core.utils import StridedMemoryView
 
@@ -1313,9 +1313,9 @@ def fake_cuMemAdvise(ptr, size, advice, location):
         calls.append((ptr, size, advice, location))
         return (driver.CUresult.CUDA_SUCCESS,)
 
-    monkeypatch.setattr(_buffer, "get_binding_version", lambda: _LEGACY_BINDINGS_VERSION)
-    monkeypatch.setattr(_buffer, "_V2_BINDINGS", -1)
-    monkeypatch.setattr(_buffer.driver, "cuMemAdvise", fake_cuMemAdvise)
+    monkeypatch.setattr(_managed_memory_ops, "get_binding_version", lambda: _LEGACY_BINDINGS_VERSION)
+    monkeypatch.setattr(_managed_memory_ops, "_V2_BINDINGS", -1)
+    monkeypatch.setattr(_managed_memory_ops.driver, "cuMemAdvise", fake_cuMemAdvise)
 
     managed_memory.advise(buffer, "set_read_mostly")
 
@@ -1338,9 +1338,9 @@ def fake_cuMemPrefetchAsync(ptr, size, location, hstream):
         calls.append((ptr, size, location, hstream))
         return (driver.CUresult.CUDA_SUCCESS,)
 
-    monkeypatch.setattr(_buffer, "get_binding_version", lambda: _LEGACY_BINDINGS_VERSION)
-    monkeypatch.setattr(_buffer, "_V2_BINDINGS", -1)
-    monkeypatch.setattr(_buffer.driver, "cuMemPrefetchAsync", fake_cuMemPrefetchAsync)
+    monkeypatch.setattr(_managed_memory_ops, "get_binding_version", lambda: _LEGACY_BINDINGS_VERSION)
+    monkeypatch.setattr(_managed_memory_ops, "_V2_BINDINGS", -1)
+    monkeypatch.setattr(_managed_memory_ops.driver, "cuMemPrefetchAsync", fake_cuMemPrefetchAsync)
 
     managed_memory.prefetch(buffer, device, stream=stream)
 

From 90f07117615a25b45baf9722c3c1f0835c85d1c5 Mon Sep 17 00:00:00 2001
From: Rob Parolin <rparolin@nvidia.com>
Date: Wed, 18 Mar 2026 14:16:38 -0700
Subject: [PATCH 14/16] pre-commit fix

---
 cuda_core/cuda/core/_memory/_buffer.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cuda_core/cuda/core/_memory/_buffer.pyx b/cuda_core/cuda/core/_memory/_buffer.pyx
index 104252a62b..e47f3f4926 100644
--- a/cuda_core/cuda/core/_memory/_buffer.pyx
+++ b/cuda_core/cuda/core/_memory/_buffer.pyx
@@ -35,7 +35,7 @@ else:
     BufferProtocol = object
 
 from cuda.core._dlpack import DLDeviceType, make_py_capsule
-from cuda.core._utils.cuda_utils import driver, handle_return
+from cuda.core._utils.cuda_utils import driver
 from cuda.core._device import Device
 
 

From b4d252cdb5a8899d775db185d0cc9ec92c9cd474 Mon Sep 17 00:00:00 2001
From: Rob Parolin <rparolin@nvidia.com>
Date: Thu, 19 Mar 2026 11:07:46 -0700
Subject: [PATCH 15/16] Removing blank file

---
 cuda_core/cuda/core/_memory/_managed_memory_ops.pxd | 6 ------
 1 file changed, 6 deletions(-)
 delete mode 100644 cuda_core/cuda/core/_memory/_managed_memory_ops.pxd

diff --git a/cuda_core/cuda/core/_memory/_managed_memory_ops.pxd b/cuda_core/cuda/core/_memory/_managed_memory_ops.pxd
deleted file mode 100644
index a7019c784d..0000000000
--- a/cuda_core/cuda/core/_memory/_managed_memory_ops.pxd
+++ /dev/null
@@ -1,6 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Managed-memory operation helpers (advise, prefetch, discard_prefetch).
-# The public API is exposed via def functions; no cdef declarations needed.

From faaa1d881363eb4ea5d3d13cf0a21b433cdcd61f Mon Sep 17 00:00:00 2001
From: Rob Parolin <rparolin@nvidia.com>
Date: Thu, 19 Mar 2026 13:15:08 -0700
Subject: [PATCH 16/16] wip

---
 .../cuda/core/_memory/_managed_memory_ops.pyx | 117 +++++-------------
 cuda_core/tests/test_memory.py                |  42 -------
 2 files changed, 29 insertions(+), 130 deletions(-)

diff --git a/cuda_core/cuda/core/_memory/_managed_memory_ops.pyx b/cuda_core/cuda/core/_memory/_managed_memory_ops.pyx
index 649c2cbe72..04dc33ed75 100644
--- a/cuda_core/cuda/core/_memory/_managed_memory_ops.pyx
+++ b/cuda_core/cuda/core/_memory/_managed_memory_ops.pyx
@@ -4,10 +4,7 @@
 
 from __future__ import annotations
 
-from libc.stdint cimport uintptr_t
-
-from cuda.bindings cimport cydriver
-from cuda.core._memory._buffer cimport Buffer, _MemAttrs, _init_mem_attrs, _query_memory_attrs
+from cuda.core._memory._buffer cimport Buffer, _init_mem_attrs
 from cuda.core._stream cimport Stream, Stream_accept
 
 from cuda.core._utils.cuda_utils import driver, get_binding_version, handle_return
@@ -56,7 +53,6 @@ cdef dict _MANAGED_ADVICE_ALLOWED_LOCTYPES = {
     "unset_accessed_by": _DEVICE_HOST_ONLY,
 }
 
-cdef int _MANAGED_SIZE_NOT_PROVIDED = -1
 cdef int _HOST_NUMA_CURRENT_ID = 0
 cdef int _FIRST_PREFETCH_LOCATION_INDEX = 0
 cdef size_t _SINGLE_RANGE_COUNT = 1
@@ -241,71 +237,19 @@ cdef void _require_managed_discard_prefetch_support(str what):
         )
 
 
-cdef tuple _managed_range_from_buffer(
-    Buffer buffer,
-    int size,
-    str what,
-):
-    if size != _MANAGED_SIZE_NOT_PROVIDED:
-        raise TypeError(f"{what} does not accept size= when target is a Buffer")
-    _require_managed_buffer(buffer, what)
-    return buffer.handle, buffer._size
-
-
-cdef uintptr_t _coerce_raw_pointer(object target, str what) except? 0:
-    cdef object ptr_obj
-    try:
-        ptr_obj = int(target)
-    except Exception as exc:
-        raise TypeError(
-            f"{what} target must be a Buffer or a raw pointer, got {type(target).__name__}"
-        ) from exc
-    if ptr_obj < 0:
-        raise ValueError(f"{what} target pointer must be >= 0, got {target!r}")
-    return <uintptr_t>ptr_obj
-
-
-cdef int _require_managed_pointer(uintptr_t ptr, str what) except -1:
-    cdef _MemAttrs mem_attrs
-    with nogil:
-        _query_memory_attrs(mem_attrs, <cydriver.CUdeviceptr>ptr)
-    if not mem_attrs.is_managed:
-        raise ValueError(f"{what} requires a managed-memory allocation")
-    return 0
-
-
-cdef tuple _normalize_managed_target_range(
-    object target,
-    int size,
-    str what,
-):
-    cdef uintptr_t ptr
-
-    if isinstance(target, Buffer):
-        return _managed_range_from_buffer(<Buffer>target, size, what)
-
-    if size == _MANAGED_SIZE_NOT_PROVIDED:
-        raise TypeError(f"{what} requires size= when target is a raw pointer")
-    ptr = _coerce_raw_pointer(target, what)
-    _require_managed_pointer(ptr, what)
-    return ptr, <size_t>size
-
-
 def advise(
-    target,
+    target: Buffer,
     advice: driver.CUmem_advise | str,
     location: Device | int | None = None,
     *,
-    int size=_MANAGED_SIZE_NOT_PROVIDED,
     location_type: str | None = None,
 ):
     """Apply managed-memory advice to an allocation range.
 
     Parameters
     ----------
-    target : :class:`Buffer` | int | object
-        Managed allocation to operate on. This may be a :class:`Buffer` or a
-        raw pointer (requires ``size=``).
+    target : :class:`Buffer`
+        Managed allocation to operate on.
     advice : :obj:`~driver.CUmem_advise` | str
         Managed-memory advice to apply. String aliases such as
         ``"set_read_mostly"``, ``"set_preferred_location"``, and
@@ -314,17 +258,18 @@ def advise(
         Target location. When ``location_type`` is ``None``, values are
         interpreted as a device ordinal, ``-1`` for host, or ``None`` for
         advice values that ignore location.
-    size : int, optional
-        Allocation size in bytes. Required when ``target`` is a raw pointer.
     location_type : str | None, optional
         Explicit location kind. Supported values are ``"device"``, ``"host"``,
         ``"host_numa"``, and ``"host_numa_current"``.
     """
+    if not isinstance(target, Buffer):
+        raise TypeError(f"advise target must be a Buffer, got {type(target).__name__}")
+    cdef Buffer buf = <Buffer>target
+    _require_managed_buffer(buf, "advise")
     cdef str advice_name
-    cdef object ptr
-    cdef size_t nbytes
+    cdef object ptr = buf.handle
+    cdef size_t nbytes = buf._size
 
-    ptr, nbytes = _normalize_managed_target_range(target, size, "advise")
     advice_name, advice = _normalize_managed_advice(advice)
     location = _normalize_managed_location(
         location,
@@ -347,37 +292,36 @@ def advise(
 
 
 def prefetch(
-    target,
+    target: Buffer,
     location: Device | int | None = None,
     *,
     stream: Stream | GraphBuilder,
-    int size=_MANAGED_SIZE_NOT_PROVIDED,
     location_type: str | None = None,
 ):
     """Prefetch a managed-memory allocation range to a target location.
 
     Parameters
     ----------
-    target : :class:`Buffer` | int | object
-        Managed allocation to operate on. This may be a :class:`Buffer` or a
-        raw pointer (requires ``size=``).
+    target : :class:`Buffer`
+        Managed allocation to operate on.
     location : :obj:`~_device.Device` | int | None, optional
         Target location. When ``location_type`` is ``None``, values are
         interpreted as a device ordinal, ``-1`` for host, or ``None``.
         A location is required for prefetch.
     stream : :obj:`~_stream.Stream` | :obj:`~_graph.GraphBuilder`
         Keyword argument specifying the stream for the asynchronous prefetch.
-    size : int, optional
-        Allocation size in bytes. Required when ``target`` is a raw pointer.
     location_type : str | None, optional
         Explicit location kind. Supported values are ``"device"``, ``"host"``,
         ``"host_numa"``, and ``"host_numa_current"``.
     """
+    if not isinstance(target, Buffer):
+        raise TypeError(f"prefetch target must be a Buffer, got {type(target).__name__}")
+    cdef Buffer buf = <Buffer>target
+    _require_managed_buffer(buf, "prefetch")
     cdef Stream s = Stream_accept(stream)
-    cdef object ptr
-    cdef size_t nbytes
+    cdef object ptr = buf.handle
+    cdef size_t nbytes = buf._size
 
-    ptr, nbytes = _normalize_managed_target_range(target, size, "prefetch")
     location = _normalize_managed_location(
         location,
         location_type,
@@ -405,40 +349,37 @@ def prefetch(
 
 
 def discard_prefetch(
-    target,
+    target: Buffer,
     location: Device | int | None = None,
     *,
     stream: Stream | GraphBuilder,
-    int size=_MANAGED_SIZE_NOT_PROVIDED,
     location_type: str | None = None,
 ):
     """Discard a managed-memory allocation range and prefetch it to a target location.
 
     Parameters
     ----------
-    target : :class:`Buffer` | int | object
-        Managed allocation to operate on. This may be a :class:`Buffer` or a
-        raw pointer (requires ``size=``).
+    target : :class:`Buffer`
+        Managed allocation to operate on.
     location : :obj:`~_device.Device` | int | None, optional
         Target location. When ``location_type`` is ``None``, values are
         interpreted as a device ordinal, ``-1`` for host, or ``None``.
         A location is required for discard_prefetch.
     stream : :obj:`~_stream.Stream` | :obj:`~_graph.GraphBuilder`
         Keyword argument specifying the stream for the asynchronous operation.
-    size : int, optional
-        Allocation size in bytes. Required when ``target`` is a raw pointer.
     location_type : str | None, optional
         Explicit location kind. Supported values are ``"device"``, ``"host"``,
         ``"host_numa"``, and ``"host_numa_current"``.
     """
-    cdef object ptr
-    cdef object batch_ptr
-    cdef size_t nbytes
-
-    ptr, nbytes = _normalize_managed_target_range(target, size, "discard_prefetch")
+    if not isinstance(target, Buffer):
+        raise TypeError(f"discard_prefetch target must be a Buffer, got {type(target).__name__}")
+    cdef Buffer buf = <Buffer>target
+    _require_managed_buffer(buf, "discard_prefetch")
     _require_managed_discard_prefetch_support("discard_prefetch")
     cdef Stream s = Stream_accept(stream)
-    batch_ptr = driver.CUdeviceptr(int(ptr))
+    cdef object ptr = buf.handle
+    cdef size_t nbytes = buf._size
+    cdef object batch_ptr = driver.CUdeviceptr(int(ptr))
     location = _normalize_managed_location(
         location,
         location_type,
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index 544b7afc03..dbb5ac6d8c 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -1441,20 +1441,6 @@ def test_managed_memory_advise_accepts_enum_value(init_cuda):
     buffer.close()
 
 
-def test_managed_memory_advise_size_rejected_for_buffer(init_cuda):
-    """advise() raises TypeError when size= is given with a Buffer target."""
-    device = Device()
-    _skip_if_managed_allocation_unsupported(device)
-    device.set_current()
-
-    buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
-
-    with pytest.raises(TypeError, match="does not accept size="):
-        managed_memory.advise(buffer, "set_read_mostly", size=1024)
-
-    buffer.close()
-
-
 def test_managed_memory_advise_invalid_advice_values(init_cuda):
     """advise() rejects invalid advice strings and wrong types."""
     device = Device()
@@ -1472,34 +1458,6 @@ def test_managed_memory_advise_invalid_advice_values(init_cuda):
     buffer.close()
 
 
-def test_managed_memory_functions_accept_raw_pointer_ranges(init_cuda):
-    device = Device()
-    _skip_if_managed_location_ops_unsupported(device)
-    device.set_current()
-
-    buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
-    stream = device.create_stream()
-
-    managed_memory.advise(buffer.handle, "set_read_mostly", size=buffer.size)
-    assert (
-        _get_int_mem_range_attr(
-            buffer,
-            driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY,
-        )
-        == _READ_MOSTLY_ENABLED
-    )
-
-    managed_memory.prefetch(buffer.handle, device, size=buffer.size, stream=stream)
-    stream.sync()
-    last_location = _get_int_mem_range_attr(
-        buffer,
-        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION,
-    )
-    assert last_location == device.device_id
-
-    buffer.close()
-
-
 def test_managed_memory_resource_host_numa_auto_resolve_failure(init_cuda):
     """host_numa with None raises RuntimeError when NUMA ID cannot be determined."""
     from unittest.mock import MagicMock, patch