diff --git a/cuda_bindings/pixi.lock b/cuda_bindings/pixi.lock
index b01d6eec69..237a169580 100644
--- a/cuda_bindings/pixi.lock
+++ b/cuda_bindings/pixi.lock
@@ -1081,21 +1081,21 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.4-h3394656_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/conda-gcc-specs-15.2.0-h53410ce_16.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cccl_linux-64-13.2.27-ha770c72_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-crt-dev_linux-64-13.2.51-ha770c72_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-cudart-13.2.51-hecca717_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-cudart-dev-13.2.51-hecca717_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-dev_linux-64-13.2.51-h376f20c_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-cudart-static-13.2.51-hecca717_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-static_linux-64-13.2.51-h376f20c_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart_linux-64-13.2.51-h376f20c_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvrtc-13.2.51-hecca717_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvvm-13.2.51-h69a702a_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-nvvm-dev_linux-64-13.2.51-ha770c72_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvvm-impl-13.2.51-h4bc722e_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvvm-tools-13.2.51-h4bc722e_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-profiler-api-13.2.20-h7938cbb_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.2-he2cc418_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cccl_linux-64-12.9.27-ha770c72_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-crt-dev_linux-64-12.9.86-ha770c72_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-cudart-12.9.79-h5888daf_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-cudart-dev-12.9.79-h5888daf_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-dev_linux-64-12.9.79-h3f2d84a_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-cudart-static-12.9.79-h5888daf_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-static_linux-64-12.9.79-h3f2d84a_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart_linux-64-12.9.79-h3f2d84a_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvrtc-12.9.86-hecca717_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvvm-12.9.86-h69a702a_6.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-nvvm-dev_linux-64-12.9.86-ha770c72_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvvm-impl-12.9.86-h4bc722e_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvvm-tools-12.9.86-h4bc722e_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-profiler-api-12.9.79-h7938cbb_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.9-h4f385c5_3.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/cython-3.2.3-py314h1807b08_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/dav1d-1.2.1-hd590300_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/dbus-1.16.2-h24cb091_1.conda
@@ -1134,7 +1134,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.11.0-5_h4a7cf45_openblas.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcap-2.77-h3ff7636_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.11.0-5_h0358290_openblas.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufile-1.17.0.44-h85c024f_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufile-1.14.1.1-hbc026e6_1.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.25-h17f619e_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libdrm-2.4.125-hb03c661_1.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libegl-1.7.0-ha4b6fd6_2.conda
@@ -1160,8 +1160,8 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.1-hb9d3cd8_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-hb9d3cd8_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libnl-3.11.0-hb9d3cd8_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/libnvfatbin-13.2.51-hecca717_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/libnvjitlink-13.2.51-hecca717_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libnvfatbin-12.9.82-hecca717_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libnvjitlink-12.9.86-hecca717_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.5-hd0c01bc_1.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.30-pthreads_h94d23a6_4.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libopenvino-2025.2.0-hb617929_1.conda
@@ -1264,7 +1264,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.23.0-pyhcf101f3_1.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb78ec9c_6.conda
       - conda: .
-        build: py314hb727236_0
+        build: py314ha6d028f_0
       - conda: ../cuda_pathfinder
       linux-aarch64:
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/_openmp_mutex-4.5-2_gnu.tar.bz2
@@ -1460,21 +1460,21 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/cairo-1.18.4-h5782bbf_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/conda-gcc-specs-15.2.0-hd546029_16.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cccl_win-64-12.9.27-h57928b3_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-crt-dev_win-64-12.9.86-h57928b3_2.conda
-      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-cudart-12.9.79-he0c23c2_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-cudart-dev-12.9.79-he0c23c2_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-dev_win-64-12.9.79-he0c23c2_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-cudart-static-12.9.79-he0c23c2_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-static_win-64-12.9.79-he0c23c2_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart_win-64-12.9.79-he0c23c2_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvrtc-12.9.86-hac47afa_1.conda
-      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvvm-12.9.86-h719f0c7_6.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-nvvm-dev_win-64-12.9.86-h57928b3_2.conda
-      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvvm-impl-12.9.86-h2466b09_2.conda
-      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvvm-tools-12.9.86-h2466b09_2.conda
-      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-profiler-api-12.9.79-h57928b3_1.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.9-h4f385c5_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cccl_win-64-13.2.27-h57928b3_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-crt-dev_win-64-13.2.51-h57928b3_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-cudart-13.2.51-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-cudart-dev-13.2.51-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-dev_win-64-13.2.51-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-cudart-static-13.2.51-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-static_win-64-13.2.51-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart_win-64-13.2.51-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvrtc-13.2.51-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvvm-13.2.51-h719f0c7_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-nvvm-dev_win-64-13.2.51-h57928b3_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvvm-impl-13.2.51-h2466b09_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvvm-tools-13.2.51-h2466b09_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-profiler-api-13.2.20-h57928b3_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.2-he2cc418_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/cython-3.2.3-py314h344ed54_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/dav1d-1.2.1-hcfcfb64_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
@@ -1520,8 +1520,8 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/liblapack-3.11.0-5_hf9ab0e9_mkl.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/liblzma-5.8.1-h2466b09_2.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libmpdec-4.0.0-h2466b09_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/win-64/libnvfatbin-12.9.82-hac47afa_1.conda
-      - conda: https://conda.anaconda.org/conda-forge/win-64/libnvjitlink-12.9.86-hac47afa_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libnvfatbin-13.2.51-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libnvjitlink-13.2.51-hac47afa_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libogg-1.3.5-h2466b09_1.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libopus-1.6-h6a83c73_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libpng-1.6.53-h7351971_0.conda
@@ -1583,7 +1583,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.23.0-pyhcf101f3_1.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py314h5e6f764_0
+        build: py314h356c398_0
       - conda: ../cuda_pathfinder
 packages:
 - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2
@@ -2154,7 +2154,7 @@ packages:
   subdir: win-64
   variants:
     c_compiler: vs2022
-    cuda-version: 13.2.*
+    cuda_version: 13.2.*
     cxx_compiler: vs2022
     python: 3.14.*
     target_platform: win-64
@@ -2182,7 +2182,7 @@ packages:
   subdir: win-64
   variants:
     c_compiler: vs2022
-    cuda-version: 12.*
+    cuda_version: 12.*
     cxx_compiler: vs2022
     python: 3.14.*
     target_platform: win-64
@@ -2209,7 +2209,7 @@ packages:
   build: py314h9a28ecd_0
   subdir: linux-aarch64
   variants:
-    cuda-version: 13.2.*
+    cuda_version: 13.2.*
     python: 3.14.*
     target_platform: linux-aarch64
   depends:
@@ -2237,7 +2237,7 @@ packages:
   build: py314ha6d028f_0
   subdir: linux-64
   variants:
-    cuda-version: 12.*
+    cuda_version: 12.*
     python: 3.14.*
     target_platform: linux-64
   depends:
@@ -2265,7 +2265,7 @@ packages:
   build: py314hb727236_0
   subdir: linux-64
   variants:
-    cuda-version: 13.2.*
+    cuda_version: 13.2.*
     python: 3.14.*
     target_platform: linux-64
   depends:
@@ -2293,7 +2293,7 @@ packages:
   build: py314he8946ed_0
   subdir: linux-aarch64
   variants:
-    cuda-version: 12.*
+    cuda_version: 12.*
     python: 3.14.*
     target_platform: linux-aarch64
   depends:
diff --git a/cuda_core/cuda/core/__init__.py b/cuda_core/cuda/core/__init__.py
index 139078e86e..c55c0786ed 100644
--- a/cuda_core/cuda/core/__init__.py
+++ b/cuda_core/cuda/core/__init__.py
@@ -28,7 +28,7 @@
 finally:
     del bindings, importlib, subdir, cuda_major, cuda_minor
 
-from cuda.core import system, utils
+from cuda.core import managed_memory, system, utils
 from cuda.core._device import Device
 from cuda.core._event import Event, EventOptions
 from cuda.core._graph import (
diff --git a/cuda_core/cuda/core/_memory/_buffer.pxd b/cuda_core/cuda/core/_memory/_buffer.pxd
index 91c0cfe24a..9065da77eb 100644
--- a/cuda_core/cuda/core/_memory/_buffer.pxd
+++ b/cuda_core/cuda/core/_memory/_buffer.pxd
@@ -4,6 +4,7 @@
 
 from libc.stdint cimport uintptr_t
 
+from cuda.bindings cimport cydriver
 from cuda.core._resource_handles cimport DevicePtrHandle
 from cuda.core._stream cimport Stream
 
@@ -12,6 +13,7 @@ cdef struct _MemAttrs:
     int device_id
     bint is_device_accessible
     bint is_host_accessible
+    bint is_managed
 
 
 cdef class Buffer:
@@ -37,3 +39,10 @@ cdef Buffer Buffer_from_deviceptr_handle(
     MemoryResource mr,
     object ipc_descriptor = *
 )
+
+# Memory attribute query helpers (used by _managed_memory_ops)
+cdef void _init_mem_attrs(Buffer self)
+cdef int _query_memory_attrs(
+    _MemAttrs& out,
+    cydriver.CUdeviceptr ptr,
+) except -1 nogil
diff --git a/cuda_core/cuda/core/_memory/_buffer.pyx b/cuda_core/cuda/core/_memory/_buffer.pyx
index b836972f5f..e47f3f4926 100644
--- a/cuda_core/cuda/core/_memory/_buffer.pyx
+++ b/cuda_core/cuda/core/_memory/_buffer.pyx
@@ -71,6 +71,7 @@ A type union of :obj:`~driver.CUdeviceptr`, `int` and `None` for hinting
 :attr:`Buffer.handle`.
 """
 
+
 cdef class Buffer:
     """Represent a handle to allocated memory.
 
@@ -420,14 +421,14 @@ cdef class Buffer:
 
 # Memory Attribute Query Helpers
 # ------------------------------
-cdef inline void _init_mem_attrs(Buffer self):
+cdef void _init_mem_attrs(Buffer self):
     """Initialize memory attributes by querying the pointer."""
     if not self._mem_attrs_inited:
         _query_memory_attrs(self._mem_attrs, as_cu(self._h_ptr))
         self._mem_attrs_inited = True
 
 
-cdef inline int _query_memory_attrs(
+cdef int _query_memory_attrs(
     _MemAttrs& out,
     cydriver.CUdeviceptr ptr
 ) except -1 nogil:
@@ -459,6 +460,7 @@ cdef inline int _query_memory_attrs(
         out.is_host_accessible = True
         out.is_device_accessible = False
         out.device_id = -1
+        out.is_managed = False
     elif (
         is_managed
         or memory_type == cydriver.CUmemorytype.CU_MEMORYTYPE_HOST
@@ -467,10 +469,12 @@ cdef inline int _query_memory_attrs(
         out.is_host_accessible = True
         out.is_device_accessible = True
         out.device_id = device_id
+        out.is_managed = is_managed != 0
     elif memory_type == cydriver.CUmemorytype.CU_MEMORYTYPE_DEVICE:
         out.is_host_accessible = False
         out.is_device_accessible = True
         out.device_id = device_id
+        out.is_managed = False
     else:
         with cython.gil:
             raise ValueError(f"Unsupported memory type: {memory_type}")
diff --git a/cuda_core/cuda/core/_memory/_managed_memory_ops.pxd b/cuda_core/cuda/core/_memory/_managed_memory_ops.pxd
new file mode 100644
index 0000000000..a7019c784d
--- /dev/null
+++ b/cuda_core/cuda/core/_memory/_managed_memory_ops.pxd
@@ -0,0 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Managed-memory operation helpers (advise, prefetch, discard_prefetch).
+# The public API is exposed via def functions; no cdef declarations needed.
diff --git a/cuda_core/cuda/core/_memory/_managed_memory_ops.pyx b/cuda_core/cuda/core/_memory/_managed_memory_ops.pyx
new file mode 100644
index 0000000000..649c2cbe72
--- /dev/null
+++ b/cuda_core/cuda/core/_memory/_managed_memory_ops.pyx
@@ -0,0 +1,458 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import annotations
+
+from libc.stdint cimport uintptr_t
+
+from cuda.bindings cimport cydriver
+from cuda.core._memory._buffer cimport Buffer, _MemAttrs, _init_mem_attrs, _query_memory_attrs
+from cuda.core._stream cimport Stream, Stream_accept
+
+from cuda.core._utils.cuda_utils import driver, get_binding_version, handle_return
+from cuda.core._device import Device
+
+
+cdef tuple _VALID_MANAGED_LOCATION_TYPES = (
+    "device",
+    "host",
+    "host_numa",
+    "host_numa_current",
+)
+
+cdef dict _MANAGED_LOCATION_TYPE_ATTRS = {
+    "device": "CU_MEM_LOCATION_TYPE_DEVICE",
+    "host": "CU_MEM_LOCATION_TYPE_HOST",
+    "host_numa": "CU_MEM_LOCATION_TYPE_HOST_NUMA",
+    "host_numa_current": "CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT",
+}
+
+cdef dict _MANAGED_ADVICE_ALIASES = {
+    "set_read_mostly": "CU_MEM_ADVISE_SET_READ_MOSTLY",
+    "unset_read_mostly": "CU_MEM_ADVISE_UNSET_READ_MOSTLY",
+    "set_preferred_location": "CU_MEM_ADVISE_SET_PREFERRED_LOCATION",
+    "unset_preferred_location": "CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION",
+    "set_accessed_by": "CU_MEM_ADVISE_SET_ACCESSED_BY",
+    "unset_accessed_by": "CU_MEM_ADVISE_UNSET_ACCESSED_BY",
+}
+
+cdef frozenset _MANAGED_ADVICE_IGNORE_LOCATION = frozenset((
+    "set_read_mostly",
+    "unset_read_mostly",
+    "unset_preferred_location",
+))
+
+cdef frozenset _ALL_LOCATION_TYPES = frozenset(("device", "host", "host_numa", "host_numa_current"))
+cdef frozenset _DEVICE_HOST_NUMA = frozenset(("device", "host", "host_numa"))
+cdef frozenset _DEVICE_HOST_ONLY = frozenset(("device", "host"))
+
+cdef dict _MANAGED_ADVICE_ALLOWED_LOCTYPES = {
+    "set_read_mostly": _DEVICE_HOST_NUMA,
+    "unset_read_mostly": _DEVICE_HOST_NUMA,
+    "set_preferred_location": _ALL_LOCATION_TYPES,
+    "unset_preferred_location": _DEVICE_HOST_NUMA,
+    "set_accessed_by": _DEVICE_HOST_ONLY,
+    "unset_accessed_by": _DEVICE_HOST_ONLY,
+}
+
+cdef int _MANAGED_SIZE_NOT_PROVIDED = -1
+cdef int _HOST_NUMA_CURRENT_ID = 0
+cdef int _FIRST_PREFETCH_LOCATION_INDEX = 0
+cdef size_t _SINGLE_RANGE_COUNT = 1
+cdef size_t _SINGLE_PREFETCH_LOCATION_COUNT = 1
+cdef unsigned long long _MANAGED_OPERATION_FLAGS = 0
+
+# Lazily cached values for immutable runtime properties.
+cdef object _CU_DEVICE_CPU = None
+cdef dict _ADVICE_ENUM_TO_ALIAS = None
+_V2_BINDINGS = -1
+cdef int _DISCARD_PREFETCH_SUPPORTED = -1
+
+
+cdef object _managed_location_enum(str location_type):
+    cdef str attr_name = _MANAGED_LOCATION_TYPE_ATTRS[location_type]
+    cdef object result = getattr(driver.CUmemLocationType, attr_name, None)
+    if result is None:
+        raise RuntimeError(
+            f"Managed-memory location type {location_type!r} is not supported by the "
+            f"installed cuda.bindings package."
+        )
+    return result
+
+
+cdef object _make_managed_location(str location_type, int location_id):
+    global _CU_DEVICE_CPU
+    cdef object location = driver.CUmemLocation()
+    location.type = _managed_location_enum(location_type)
+    if location_type == "host":
+        if _CU_DEVICE_CPU is None:
+            _CU_DEVICE_CPU = int(getattr(driver, "CU_DEVICE_CPU", -1))
+        location.id = _CU_DEVICE_CPU
+    elif location_type == "host_numa_current":
+        location.id = _HOST_NUMA_CURRENT_ID
+    else:
+        location.id = location_id
+    return location
+
+
+cdef tuple _normalize_managed_advice(object advice):
+    cdef str alias
+    cdef str attr_name
+    if isinstance(advice, str):
+        alias = advice.lower()
+        attr_name = _MANAGED_ADVICE_ALIASES.get(alias)
+        if attr_name is None:
+            raise ValueError(
+                "advice must be one of "
+                f"{tuple(sorted(_MANAGED_ADVICE_ALIASES))!r}, got {advice!r}"
+            )
+        return alias, getattr(driver.CUmem_advise, attr_name)
+
+    if isinstance(advice, driver.CUmem_advise):
+        global _ADVICE_ENUM_TO_ALIAS
+        if _ADVICE_ENUM_TO_ALIAS is None:
+            _ADVICE_ENUM_TO_ALIAS = {}
+            for alias, attr_name in _MANAGED_ADVICE_ALIASES.items():
+                enum_val = getattr(driver.CUmem_advise, attr_name, None)
+                if enum_val is not None:
+                    _ADVICE_ENUM_TO_ALIAS[enum_val] = alias
+        alias = _ADVICE_ENUM_TO_ALIAS.get(advice)
+        if alias is None:
+            raise ValueError(f"Unsupported advice value: {advice!r}")
+        return alias, advice
+
+    raise TypeError(
+        "advice must be a cuda.bindings.driver.CUmem_advise value or a supported string alias"
+    )
+
+
+cdef object _normalize_managed_location(
+    object location,
+    object location_type,
+    str what,
+    bint allow_none=False,
+    frozenset allowed_loctypes=_ALL_LOCATION_TYPES,
+):
+    cdef object loc_type
+    cdef int loc_id
+
+    if isinstance(location, Device):
+        location = location.device_id
+
+    if location_type is not None and not isinstance(location_type, str):
+        raise TypeError(f"{what} location_type must be a string or None, got {type(location_type).__name__}")
+
+    loc_type = None if location_type is None else (<str>location_type).lower()
+    if loc_type is not None and loc_type not in _VALID_MANAGED_LOCATION_TYPES:
+        raise ValueError(
+            f"{what} location_type must be one of {_VALID_MANAGED_LOCATION_TYPES!r} "
+            f"or None, got {location_type!r}"
+        )
+
+    if loc_type is not None and loc_type not in allowed_loctypes:
+        raise ValueError(f"{what} does not support location_type='{loc_type}'")
+
+    if loc_type is None:
+        if location is None:
+            if allow_none:
+                return _make_managed_location("host", -1)
+            raise ValueError(f"{what} requires a location")
+        if not isinstance(location, int):
+            raise TypeError(
+                f"{what} location must be a Device, int, or None, got {type(location).__name__}"
+            )
+        loc_id = <int>location
+        if loc_id == -1:
+            if "host" not in allowed_loctypes:
+                raise ValueError(f"{what} does not support host locations")
+            return _make_managed_location("host", -1)
+        elif loc_id >= 0:
+            return _make_managed_location("device", loc_id)
+        else:
+            raise ValueError(
+                f"{what} location must be a device ordinal (>= 0), -1 for host, or None; got {location!r}"
+            )
+    elif loc_type == "device":
+        if isinstance(location, int) and <int>location >= 0:
+            loc_id = <int>location
+        else:
+            raise ValueError(
+                f"{what} location must be a device ordinal (>= 0) when location_type is 'device', got {location!r}"
+            )
+        return _make_managed_location(loc_type, loc_id)
+    elif loc_type == "host":
+        if location not in (None, -1):
+            raise ValueError(
+                f"{what} location must be None or -1 when location_type is 'host', got {location!r}"
+            )
+        return _make_managed_location(loc_type, -1)
+    elif loc_type == "host_numa":
+        if not isinstance(location, int) or <int>location < 0:
+            raise ValueError(
+                f"{what} location must be a NUMA node ID (>= 0) when location_type is 'host_numa', got {location!r}"
+            )
+        return _make_managed_location(loc_type, <int>location)
+    else:
+        if location is not None:
+            raise ValueError(
+                f"{what} location must be None when location_type is 'host_numa_current', got {location!r}"
+            )
+        return _make_managed_location(loc_type, _HOST_NUMA_CURRENT_ID)
+
+
+cdef bint _managed_location_uses_v2_bindings():
+    # cuda.bindings 13.x switches these APIs to CUmemLocation-based wrappers.
+    global _V2_BINDINGS
+    if _V2_BINDINGS < 0:
+        _V2_BINDINGS = 1 if get_binding_version() >= (13, 0) else 0
+    return _V2_BINDINGS != 0
+
+
+cdef object _LEGACY_LOC_DEVICE = None
+cdef object _LEGACY_LOC_HOST = None
+
+cdef int _managed_location_to_legacy_device(object location, str what):
+    global _LEGACY_LOC_DEVICE, _LEGACY_LOC_HOST
+    if _LEGACY_LOC_DEVICE is None:
+        _LEGACY_LOC_DEVICE = _managed_location_enum("device")
+        _LEGACY_LOC_HOST = _managed_location_enum("host")
+    cdef object loc_type = location.type
+    if loc_type == _LEGACY_LOC_DEVICE or loc_type == _LEGACY_LOC_HOST:
+        return <int>location.id
+    raise RuntimeError(
+        f"{what} requires cuda.bindings 13.x for location_type={loc_type!r}"
+    )
+
+
+cdef void _require_managed_buffer(Buffer self, str what):
+    _init_mem_attrs(self)
+    if not self._mem_attrs.is_managed:
+        raise ValueError(f"{what} requires a managed-memory allocation")
+
+
+cdef void _require_managed_discard_prefetch_support(str what):
+    global _DISCARD_PREFETCH_SUPPORTED
+    if _DISCARD_PREFETCH_SUPPORTED < 0:
+        _DISCARD_PREFETCH_SUPPORTED = 1 if hasattr(driver, "cuMemDiscardAndPrefetchBatchAsync") else 0
+    if not _DISCARD_PREFETCH_SUPPORTED:
+        raise RuntimeError(
+            f"{what} requires cuda.bindings support for cuMemDiscardAndPrefetchBatchAsync"
+        )
+
+
+cdef tuple _managed_range_from_buffer(
+    Buffer buffer,
+    int size,
+    str what,
+):
+    if size != _MANAGED_SIZE_NOT_PROVIDED:
+        raise TypeError(f"{what} does not accept size= when target is a Buffer")
+    _require_managed_buffer(buffer, what)
+    return buffer.handle, buffer._size
+
+
+cdef uintptr_t _coerce_raw_pointer(object target, str what) except? 0:
+    cdef object ptr_obj
+    try:
+        ptr_obj = int(target)
+    except Exception as exc:
+        raise TypeError(
+            f"{what} target must be a Buffer or a raw pointer, got {type(target).__name__}"
+        ) from exc
+    if ptr_obj < 0:
+        raise ValueError(f"{what} target pointer must be >= 0, got {target!r}")
+    return <uintptr_t>ptr_obj
+
+
+cdef int _require_managed_pointer(uintptr_t ptr, str what) except -1:
+    cdef _MemAttrs mem_attrs
+    with nogil:
+        _query_memory_attrs(mem_attrs, <cydriver.CUdeviceptr>ptr)
+    if not mem_attrs.is_managed:
+        raise ValueError(f"{what} requires a managed-memory allocation")
+    return 0
+
+
+cdef tuple _normalize_managed_target_range(
+    object target,
+    int size,
+    str what,
+):
+    cdef uintptr_t ptr
+
+    if isinstance(target, Buffer):
+        return _managed_range_from_buffer(<Buffer>target, size, what)
+
+    if size == _MANAGED_SIZE_NOT_PROVIDED:
+        raise TypeError(f"{what} requires size= when target is a raw pointer")
+    ptr = _coerce_raw_pointer(target, what)
+    _require_managed_pointer(ptr, what)
+    return ptr, <size_t>size
+
+
+def advise(
+    target,
+    advice: driver.CUmem_advise | str,
+    location: Device | int | None = None,
+    *,
+    int size=_MANAGED_SIZE_NOT_PROVIDED,
+    location_type: str | None = None,
+):
+    """Apply managed-memory advice to an allocation range.
+
+    Parameters
+    ----------
+    target : :class:`Buffer` | int | object
+        Managed allocation to operate on. This may be a :class:`Buffer` or a
+        raw pointer (requires ``size=``).
+    advice : :obj:`~driver.CUmem_advise` | str
+        Managed-memory advice to apply. String aliases such as
+        ``"set_read_mostly"``, ``"set_preferred_location"``, and
+        ``"set_accessed_by"`` are accepted.
+    location : :obj:`~_device.Device` | int | None, optional
+        Target location. When ``location_type`` is ``None``, values are
+        interpreted as a device ordinal, ``-1`` for host, or ``None`` for
+        advice values that ignore location.
+    size : int, optional
+        Allocation size in bytes. Required when ``target`` is a raw pointer.
+    location_type : str | None, optional
+        Explicit location kind. Supported values are ``"device"``, ``"host"``,
+        ``"host_numa"``, and ``"host_numa_current"``.
+    """
+    cdef str advice_name
+    cdef object ptr
+    cdef size_t nbytes
+
+    ptr, nbytes = _normalize_managed_target_range(target, size, "advise")
+    advice_name, advice = _normalize_managed_advice(advice)
+    location = _normalize_managed_location(
+        location,
+        location_type,
+        "advise",
+        allow_none=advice_name in _MANAGED_ADVICE_IGNORE_LOCATION,
+        allowed_loctypes=_MANAGED_ADVICE_ALLOWED_LOCTYPES[advice_name],
+    )
+    if _managed_location_uses_v2_bindings():
+        handle_return(driver.cuMemAdvise(ptr, nbytes, advice, location))
+    else:
+        handle_return(
+            driver.cuMemAdvise(
+                ptr,
+                nbytes,
+                advice,
+                _managed_location_to_legacy_device(location, "advise"),
+            )
+        )
+
+
+def prefetch(
+    target,
+    location: Device | int | None = None,
+    *,
+    stream: Stream | GraphBuilder,
+    int size=_MANAGED_SIZE_NOT_PROVIDED,
+    location_type: str | None = None,
+):
+    """Prefetch a managed-memory allocation range to a target location.
+
+    Parameters
+    ----------
+    target : :class:`Buffer` | int | object
+        Managed allocation to operate on. This may be a :class:`Buffer` or a
+        raw pointer (requires ``size=``).
+    location : :obj:`~_device.Device` | int | None, optional
+        Target location. When ``location_type`` is ``None``, values are
+        interpreted as a device ordinal, ``-1`` for host, or ``None``.
+        A location is required for prefetch.
+    stream : :obj:`~_stream.Stream` | :obj:`~_graph.GraphBuilder`
+        Keyword argument specifying the stream for the asynchronous prefetch.
+    size : int, optional
+        Allocation size in bytes. Required when ``target`` is a raw pointer.
+    location_type : str | None, optional
+        Explicit location kind. Supported values are ``"device"``, ``"host"``,
+        ``"host_numa"``, and ``"host_numa_current"``.
+    """
+    cdef Stream s = Stream_accept(stream)
+    cdef object ptr
+    cdef size_t nbytes
+
+    ptr, nbytes = _normalize_managed_target_range(target, size, "prefetch")
+    location = _normalize_managed_location(
+        location,
+        location_type,
+        "prefetch",
+    )
+    if _managed_location_uses_v2_bindings():
+        handle_return(
+            driver.cuMemPrefetchAsync(
+                ptr,
+                nbytes,
+                location,
+                _MANAGED_OPERATION_FLAGS,
+                s.handle,
+            )
+        )
+    else:
+        handle_return(
+            driver.cuMemPrefetchAsync(
+                ptr,
+                nbytes,
+                _managed_location_to_legacy_device(location, "prefetch"),
+                s.handle,
+            )
+        )
+
+
+def discard_prefetch(
+    target,
+    location: Device | int | None = None,
+    *,
+    stream: Stream | GraphBuilder,
+    int size=_MANAGED_SIZE_NOT_PROVIDED,
+    location_type: str | None = None,
+):
+    """Discard a managed-memory allocation range and prefetch it to a target location.
+
+    Parameters
+    ----------
+    target : :class:`Buffer` | int | object
+        Managed allocation to operate on. This may be a :class:`Buffer` or a
+        raw pointer (requires ``size=``).
+    location : :obj:`~_device.Device` | int | None, optional
+        Target location. When ``location_type`` is ``None``, values are
+        interpreted as a device ordinal, ``-1`` for host, or ``None``.
+        A location is required for discard_prefetch.
+    stream : :obj:`~_stream.Stream` | :obj:`~_graph.GraphBuilder`
+        Keyword argument specifying the stream for the asynchronous operation.
+    size : int, optional
+        Allocation size in bytes. Required when ``target`` is a raw pointer.
+    location_type : str | None, optional
+        Explicit location kind. Supported values are ``"device"``, ``"host"``,
+        ``"host_numa"``, and ``"host_numa_current"``.
+    """
+    cdef object ptr
+    cdef object batch_ptr
+    cdef size_t nbytes
+
+    ptr, nbytes = _normalize_managed_target_range(target, size, "discard_prefetch")
+    _require_managed_discard_prefetch_support("discard_prefetch")
+    cdef Stream s = Stream_accept(stream)
+    batch_ptr = driver.CUdeviceptr(int(ptr))
+    location = _normalize_managed_location(
+        location,
+        location_type,
+        "discard_prefetch",
+    )
+    handle_return(
+        driver.cuMemDiscardAndPrefetchBatchAsync(
+            [batch_ptr],
+            [nbytes],
+            _SINGLE_RANGE_COUNT,
+            [location],
+            [_FIRST_PREFETCH_LOCATION_INDEX],
+            _SINGLE_PREFETCH_LOCATION_COUNT,
+            _MANAGED_OPERATION_FLAGS,
+            s.handle,
+        )
+    )
diff --git a/cuda_core/cuda/core/experimental/__init__.py b/cuda_core/cuda/core/experimental/__init__.py
index e7989f0f26..83fb1c7581 100644
--- a/cuda_core/cuda/core/experimental/__init__.py
+++ b/cuda_core/cuda/core/experimental/__init__.py
@@ -38,9 +38,10 @@ def _warn_deprecated():
 _warn_deprecated()
 
 
-from cuda.core import system, utils
+from cuda.core import managed_memory, system, utils
 
 # Make utils accessible as a submodule for backward compatibility
+__import__("sys").modules[__spec__.name + ".managed_memory"] = managed_memory
 __import__("sys").modules[__spec__.name + ".utils"] = utils
 
 
diff --git a/cuda_core/cuda/core/managed_memory.py b/cuda_core/cuda/core/managed_memory.py
new file mode 100644
index 0000000000..005c9ec3cf
--- /dev/null
+++ b/cuda_core/cuda/core/managed_memory.py
@@ -0,0 +1,9 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Managed-memory range operations."""
+
+from cuda.core._memory._managed_memory_ops import advise, discard_prefetch, prefetch
+
+__all__ = ["advise", "discard_prefetch", "prefetch"]
diff --git a/cuda_core/docs/source/api.rst b/cuda_core/docs/source/api.rst
index 86a83c4e86..29fd9bf62a 100644
--- a/cuda_core/docs/source/api.rst
+++ b/cuda_core/docs/source/api.rst
@@ -62,6 +62,21 @@ CUDA runtime
    on other non-blocking streams.
 
 
+.. module:: cuda.core.managed_memory
+
+Managed memory
+--------------
+
+.. autosummary::
+   :toctree: generated/
+
+   advise
+   prefetch
+   discard_prefetch
+
+.. module:: cuda.core
+   :no-index:
+
 CUDA compilation toolchain
 --------------------------
 
diff --git a/cuda_core/docs/source/release/0.7.x-notes.rst b/cuda_core/docs/source/release/0.7.x-notes.rst
index 98551603b6..186e3181f1 100644
--- a/cuda_core/docs/source/release/0.7.x-notes.rst
+++ b/cuda_core/docs/source/release/0.7.x-notes.rst
@@ -35,6 +35,13 @@ New features
   preference, or a tuple such as ``("device", 0)``, ``("host", None)``, or
   ``("host_numa", 3)``.
 
+- Added managed-memory range operations under :mod:`cuda.core.managed_memory`:
+  ``advise()``, ``prefetch()``, and ``discard_prefetch()``. These free
+  functions accept either a managed :class:`Buffer` or a raw pointer plus
+  ``size=``, validate that the target allocation is managed memory, and then
+  forward to the corresponding CUDA driver operations for range advice and
+  migration.
+
 - Added ``numa_id`` option to :class:`PinnedMemoryResourceOptions` for explicit
   control over host NUMA node placement. When ``ipc_enabled=True`` and
   ``numa_id`` is not set, the NUMA node is automatically derived from the
diff --git a/cuda_core/pixi.lock b/cuda_core/pixi.lock
index 78da9addb5..e2f8b7b0c2 100644
--- a/cuda_core/pixi.lock
+++ b/cuda_core/pixi.lock
@@ -2598,7 +2598,7 @@ packages:
   subdir: win-64
   variants:
     c_compiler: vs2022
-    cuda-version: 13.2.*
+    cuda_version: 13.2.*
     cxx_compiler: vs2022
     python: 3.14.*
     target_platform: win-64
@@ -2625,7 +2625,7 @@ packages:
   build: py314h9a28ecd_0
   subdir: linux-aarch64
   variants:
-    cuda-version: 13.2.*
+    cuda_version: 13.2.*
     python: 3.14.*
     target_platform: linux-aarch64
   depends:
@@ -2653,7 +2653,7 @@ packages:
   build: py314hb727236_0
   subdir: linux-64
   variants:
-    cuda-version: 13.2.*
+    cuda_version: 13.2.*
     python: 3.14.*
     target_platform: linux-64
   depends:
@@ -2794,7 +2794,7 @@ packages:
   subdir: win-64
   variants:
     c_compiler: vs2022
-    cuda-version: 13.2.*
+    cuda_version: 13.2.*
     cxx_compiler: vs2022
     python: 3.14.*
     target_platform: win-64
@@ -2817,7 +2817,7 @@ packages:
   subdir: win-64
   variants:
     c_compiler: vs2022
-    cuda-version: 12.*
+    cuda_version: 12.*
     cxx_compiler: vs2022
     python: 3.14.*
     target_platform: win-64
@@ -2840,7 +2840,7 @@ packages:
   build: py314h9a28ecd_0
   subdir: linux-aarch64
   variants:
-    cuda-version: 13.2.*
+    cuda_version: 13.2.*
     python: 3.14.*
     target_platform: linux-aarch64
   depends:
@@ -2862,7 +2862,7 @@ packages:
   build: py314ha6d028f_0
   subdir: linux-64
   variants:
-    cuda-version: 12.*
+    cuda_version: 12.*
     python: 3.14.*
     target_platform: linux-64
   depends:
@@ -2884,7 +2884,7 @@ packages:
   build: py314hb727236_0
   subdir: linux-64
   variants:
-    cuda-version: 13.2.*
+    cuda_version: 13.2.*
     python: 3.14.*
     target_platform: linux-64
   depends:
@@ -2906,7 +2906,7 @@ packages:
   build: py314he8946ed_0
   subdir: linux-aarch64
   variants:
-    cuda-version: 12.*
+    cuda_version: 12.*
     python: 3.14.*
     target_platform: linux-aarch64
   depends:
diff --git a/cuda_core/tests/test_experimental_backward_compat.py b/cuda_core/tests/test_experimental_backward_compat.py
index c3215b056a..82e2cdd5be 100644
--- a/cuda_core/tests/test_experimental_backward_compat.py
+++ b/cuda_core/tests/test_experimental_backward_compat.py
@@ -38,6 +38,7 @@ def test_experimental_backward_compatibility():
     assert hasattr(cuda.core.experimental, "Device")
     assert hasattr(cuda.core.experimental, "Stream")
     assert hasattr(cuda.core.experimental, "Buffer")
+    assert hasattr(cuda.core.experimental, "managed_memory")
     assert hasattr(cuda.core.experimental, "system")
 
     # Test 2: Direct imports - should emit deprecation warning
@@ -73,6 +74,7 @@ def test_experimental_backward_compatibility():
     assert cuda.core.experimental.Linker is cuda.core.Linker
 
     # Compare singletons
+    assert cuda.core.experimental.managed_memory is cuda.core.managed_memory
     assert cuda.core.experimental.system is cuda.core.system
 
     # Test 4: Utils module works
@@ -88,6 +90,11 @@ def test_experimental_backward_compatibility():
 
     assert StridedMemoryView is not None
     assert args_viewable_as_strided_memory is not None
+    from cuda.core.experimental.managed_memory import advise, discard_prefetch, prefetch
+
+    assert advise is not None
+    assert prefetch is not None
+    assert discard_prefetch is not None
 
     # Test 5: Options classes are accessible
     assert hasattr(cuda.core.experimental, "EventOptions")
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index 8005d3ce6c..544b7afc03 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -38,16 +38,23 @@
     PinnedMemoryResourceOptions,
     VirtualMemoryResource,
     VirtualMemoryResourceOptions,
+    managed_memory,
 )
 from cuda.core import (
     system as ccx_system,
 )
 from cuda.core._dlpack import DLDeviceType
-from cuda.core._memory import IPCBufferDescriptor
+from cuda.core._memory import IPCBufferDescriptor, _managed_memory_ops
 from cuda.core._utils.cuda_utils import CUDAError, handle_return
 from cuda.core.utils import StridedMemoryView
 
 POOL_SIZE = 2097152  # 2MB size
+_MANAGED_TEST_ALLOCATION_SIZE = 4096
+_MEM_RANGE_ATTRIBUTE_VALUE_SIZE = 4
+_READ_MOSTLY_ENABLED = 1
+_HOST_LOCATION_ID = -1
+_INVALID_HOST_DEVICE_ORDINAL = 0
+_LEGACY_BINDINGS_VERSION = (12, 9)
 
 
 class DummyDeviceMemoryResource(MemoryResource):
@@ -1134,6 +1141,365 @@ def test_managed_memory_resource_preferred_location_validation(init_cuda):
         )
 
 
+def _get_mem_range_attr(buffer, attribute, data_size):
+    # cuMemRangeGetAttribute returns a raw integer when data_size <= 4.
+    return handle_return(driver.cuMemRangeGetAttribute(data_size, attribute, buffer.handle, buffer.size))
+
+
+def _get_int_mem_range_attr(buffer, attribute):
+    return _get_mem_range_attr(buffer, attribute, _MEM_RANGE_ATTRIBUTE_VALUE_SIZE)
+
+
+def _skip_if_managed_allocation_unsupported(device):
+    try:
+        if not device.properties.managed_memory:
+            pytest.skip("Device does not support managed memory operations")
+    except AttributeError:
+        pytest.skip("Managed-memory buffer operations require CUDA support")
+
+
+def _skip_if_managed_location_ops_unsupported(device):
+    _skip_if_managed_allocation_unsupported(device)
+    try:
+        if not device.properties.concurrent_managed_access:
+            pytest.skip("Device does not support concurrent managed memory access")
+    except AttributeError:
+        pytest.skip("Managed-memory location operations require CUDA support")
+
+
+def _skip_if_managed_discard_prefetch_unsupported(device):
+    _skip_if_managed_location_ops_unsupported(device)
+    if not hasattr(driver, "cuMemDiscardAndPrefetchBatchAsync"):
+        pytest.skip("discard-prefetch requires cuda.bindings support")
+
+    visible_devices = Device.get_all_devices()
+    if not all(dev.properties.concurrent_managed_access for dev in visible_devices):
+        pytest.skip("discard-prefetch requires concurrent managed access on all visible devices")
+
+
+def test_managed_memory_prefetch_supports_managed_pool_allocations(init_cuda):
+    device = Device()
+    skip_if_managed_memory_unsupported(device)
+    device.set_current()
+
+    mr = create_managed_memory_resource_or_skip()
+    buffer = mr.allocate(_MANAGED_TEST_ALLOCATION_SIZE)
+    stream = device.create_stream()
+
+    managed_memory.prefetch(buffer, _HOST_LOCATION_ID, stream=stream)
+    stream.sync()
+    last_location = _get_int_mem_range_attr(
+        buffer,
+        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION,
+    )
+    assert last_location == _HOST_LOCATION_ID
+
+    managed_memory.prefetch(buffer, device, stream=stream)
+    stream.sync()
+    last_location = _get_int_mem_range_attr(
+        buffer,
+        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION,
+    )
+    assert last_location == device.device_id
+
+    buffer.close()
+
+
+def test_managed_memory_advise_supports_external_managed_allocations(init_cuda):
+    device = Device()
+    _skip_if_managed_location_ops_unsupported(device)
+    device.set_current()
+
+    buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
+
+    managed_memory.advise(buffer, "set_read_mostly")
+    assert (
+        _get_int_mem_range_attr(
+            buffer,
+            driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY,
+        )
+        == _READ_MOSTLY_ENABLED
+    )
+
+    # cuda.bindings currently exposes the combined location attributes for
+    # cuMemRangeGetAttribute, so use the legacy location query here.
+    managed_memory.advise(buffer, "set_preferred_location", location_type="host")
+    preferred_location = _get_int_mem_range_attr(
+        buffer,
+        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION,
+    )
+    assert preferred_location == _HOST_LOCATION_ID
+
+    buffer.close()
+
+
+def test_managed_memory_prefetch_supports_external_managed_allocations(init_cuda):
+    device = Device()
+    _skip_if_managed_location_ops_unsupported(device)
+    device.set_current()
+
+    buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
+    stream = device.create_stream()
+
+    managed_memory.prefetch(buffer, device, stream=stream)
+    stream.sync()
+
+    last_location = _get_int_mem_range_attr(
+        buffer,
+        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION,
+    )
+    assert last_location == device.device_id
+
+    buffer.close()
+
+
+def test_managed_memory_discard_prefetch_supports_managed_pool_allocations(init_cuda):
+    device = Device()
+    skip_if_managed_memory_unsupported(device)
+    _skip_if_managed_discard_prefetch_unsupported(device)
+    device.set_current()
+
+    mr = create_managed_memory_resource_or_skip()
+    buffer = mr.allocate(_MANAGED_TEST_ALLOCATION_SIZE)
+    stream = device.create_stream()
+
+    managed_memory.prefetch(buffer, _HOST_LOCATION_ID, stream=stream)
+    stream.sync()
+
+    managed_memory.discard_prefetch(buffer, device, stream=stream)
+    stream.sync()
+
+    last_location = _get_int_mem_range_attr(
+        buffer,
+        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION,
+    )
+    assert last_location == device.device_id
+
+    buffer.close()
+
+
+def test_managed_memory_discard_prefetch_supports_external_managed_allocations(init_cuda):
+    device = Device()
+    _skip_if_managed_discard_prefetch_unsupported(device)
+    device.set_current()
+
+    buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
+    stream = device.create_stream()
+
+    managed_memory.prefetch(buffer, _HOST_LOCATION_ID, stream=stream)
+    stream.sync()
+
+    managed_memory.discard_prefetch(buffer, device, stream=stream)
+    stream.sync()
+
+    last_location = _get_int_mem_range_attr(
+        buffer,
+        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION,
+    )
+    assert last_location == device.device_id
+
+    buffer.close()
+
+
+def test_managed_memory_advise_uses_legacy_bindings_signature(monkeypatch, init_cuda):
+    device = Device()
+    _skip_if_managed_allocation_unsupported(device)
+    device.set_current()
+
+    buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
+    calls = []
+
+    def fake_cuMemAdvise(ptr, size, advice, location):
+        calls.append((ptr, size, advice, location))
+        return (driver.CUresult.CUDA_SUCCESS,)
+
+    monkeypatch.setattr(_managed_memory_ops, "get_binding_version", lambda: _LEGACY_BINDINGS_VERSION)
+    monkeypatch.setattr(_managed_memory_ops, "_V2_BINDINGS", -1)
+    monkeypatch.setattr(_managed_memory_ops.driver, "cuMemAdvise", fake_cuMemAdvise)
+
+    managed_memory.advise(buffer, "set_read_mostly")
+
+    assert len(calls) == 1
+    assert calls[0][3] == int(getattr(driver, "CU_DEVICE_CPU", _HOST_LOCATION_ID))
+
+    buffer.close()
+
+
+def test_managed_memory_prefetch_uses_legacy_bindings_signature(monkeypatch, init_cuda):
+    device = Device()
+    _skip_if_managed_location_ops_unsupported(device)
+    device.set_current()
+
+    buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
+    stream = device.create_stream()
+    calls = []
+
+    def fake_cuMemPrefetchAsync(ptr, size, location, hstream):
+        calls.append((ptr, size, location, hstream))
+        return (driver.CUresult.CUDA_SUCCESS,)
+
+    monkeypatch.setattr(_managed_memory_ops, "get_binding_version", lambda: _LEGACY_BINDINGS_VERSION)
+    monkeypatch.setattr(_managed_memory_ops, "_V2_BINDINGS", -1)
+    monkeypatch.setattr(_managed_memory_ops.driver, "cuMemPrefetchAsync", fake_cuMemPrefetchAsync)
+
+    managed_memory.prefetch(buffer, device, stream=stream)
+
+    assert len(calls) == 1
+    assert calls[0][2] == device.device_id
+    assert int(calls[0][3]) == int(stream.handle)
+
+    buffer.close()
+
+
+def test_managed_memory_operations_reject_non_managed_allocations(init_cuda):
+    device = Device()
+    device.set_current()
+
+    buffer = DummyDeviceMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
+    stream = device.create_stream()
+
+    with pytest.raises(ValueError, match="managed-memory allocation"):
+        managed_memory.advise(buffer, "set_read_mostly")
+    with pytest.raises(ValueError, match="managed-memory allocation"):
+        managed_memory.prefetch(buffer, device, stream=stream)
+    with pytest.raises(ValueError, match="managed-memory allocation"):
+        managed_memory.discard_prefetch(buffer, device, stream=stream)
+
+    buffer.close()
+
+
+def test_managed_memory_operation_validation(init_cuda):
+    device = Device()
+    skip_if_managed_memory_unsupported(device)
+    device.set_current()
+
+    mr = create_managed_memory_resource_or_skip()
+    buffer = mr.allocate(_MANAGED_TEST_ALLOCATION_SIZE)
+    stream = device.create_stream()
+
+    with pytest.raises(ValueError, match="requires a location"):
+        managed_memory.prefetch(buffer, stream=stream)
+    with pytest.raises(ValueError, match="does not support location_type='host_numa'"):
+        managed_memory.advise(buffer, "set_accessed_by", _INVALID_HOST_DEVICE_ORDINAL, location_type="host_numa")
+    with pytest.raises(ValueError, match="location must be None or -1"):
+        managed_memory.prefetch(buffer, _INVALID_HOST_DEVICE_ORDINAL, stream=stream, location_type="host")
+
+    buffer.close()
+
+
+def test_managed_memory_advise_location_validation(init_cuda):
+    """Verify doc-specified location constraints for each advice kind."""
+    device = Device()
+    _skip_if_managed_location_ops_unsupported(device)
+    device.set_current()
+
+    buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
+
+    # set_read_mostly works without a location (location is ignored)
+    managed_memory.advise(buffer, "set_read_mostly")
+
+    # set_preferred_location requires a location; device ordinal works
+    managed_memory.advise(buffer, "set_preferred_location", device.device_id)
+
+    # set_preferred_location with host location_type
+    managed_memory.advise(buffer, "set_preferred_location", location_type="host")
+
+    # set_accessed_by with host_numa raises ValueError (INVALID per CUDA docs)
+    with pytest.raises(ValueError, match="does not support location_type='host_numa'"):
+        managed_memory.advise(buffer, "set_accessed_by", 0, location_type="host_numa")
+
+    # set_accessed_by with host_numa_current also raises ValueError
+    with pytest.raises(ValueError, match="does not support location_type='host_numa_current'"):
+        managed_memory.advise(buffer, "set_accessed_by", location_type="host_numa_current")
+
+    # Inferred location from int: -1 maps to host, 0 maps to device
+    managed_memory.advise(buffer, "set_preferred_location", -1)
+    managed_memory.advise(buffer, "set_preferred_location", 0)
+
+    buffer.close()
+
+
+def test_managed_memory_advise_accepts_enum_value(init_cuda):
+    """advise() accepts CUmem_advise enum values directly, not just string aliases."""
+    device = Device()
+    _skip_if_managed_location_ops_unsupported(device)
+    device.set_current()
+
+    buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
+
+    advice_enum = driver.CUmem_advise.CU_MEM_ADVISE_SET_READ_MOSTLY
+    managed_memory.advise(buffer, advice_enum)
+
+    assert (
+        _get_int_mem_range_attr(
+            buffer,
+            driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY,
+        )
+        == _READ_MOSTLY_ENABLED
+    )
+
+    buffer.close()
+
+
+def test_managed_memory_advise_size_rejected_for_buffer(init_cuda):
+    """advise() raises TypeError when size= is given with a Buffer target."""
+    device = Device()
+    _skip_if_managed_allocation_unsupported(device)
+    device.set_current()
+
+    buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
+
+    with pytest.raises(TypeError, match="does not accept size="):
+        managed_memory.advise(buffer, "set_read_mostly", size=1024)
+
+    buffer.close()
+
+
+def test_managed_memory_advise_invalid_advice_values(init_cuda):
+    """advise() rejects invalid advice strings and wrong types."""
+    device = Device()
+    _skip_if_managed_allocation_unsupported(device)
+    device.set_current()
+
+    buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
+
+    with pytest.raises(ValueError, match="advice must be one of"):
+        managed_memory.advise(buffer, "not_a_real_advice")
+
+    with pytest.raises(TypeError, match="advice must be"):
+        managed_memory.advise(buffer, 42)
+
+    buffer.close()
+
+
+def test_managed_memory_functions_accept_raw_pointer_ranges(init_cuda):
+    device = Device()
+    _skip_if_managed_location_ops_unsupported(device)
+    device.set_current()
+
+    buffer = DummyUnifiedMemoryResource(device).allocate(_MANAGED_TEST_ALLOCATION_SIZE)
+    stream = device.create_stream()
+
+    managed_memory.advise(buffer.handle, "set_read_mostly", size=buffer.size)
+    assert (
+        _get_int_mem_range_attr(
+            buffer,
+            driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY,
+        )
+        == _READ_MOSTLY_ENABLED
+    )
+
+    managed_memory.prefetch(buffer.handle, device, size=buffer.size, stream=stream)
+    stream.sync()
+    last_location = _get_int_mem_range_attr(
+        buffer,
+        driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION,
+    )
+    assert last_location == device.device_id
+
+    buffer.close()
+
+
 def test_managed_memory_resource_host_numa_auto_resolve_failure(init_cuda):
     """host_numa with None raises RuntimeError when NUMA ID cannot be determined."""
     from unittest.mock import MagicMock, patch