diff --git a/transformer_engine/common/nvshmem_api/CMakeLists.txt b/transformer_engine/common/nvshmem_api/CMakeLists.txt index 1e72e42b0a..3d9b6b5ec4 100644 --- a/transformer_engine/common/nvshmem_api/CMakeLists.txt +++ b/transformer_engine/common/nvshmem_api/CMakeLists.txt @@ -16,7 +16,8 @@ set(NVSHMEMAPI_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}" PARENT_SCOPE) target_link_directories(nvshmemapi PUBLIC ${NVSHMEM_HOME}/lib) target_link_libraries(nvshmemapi PUBLIC -static-libstdc++ nvshmem_device nvshmem_host CUDA::nvml CUDA::cublas CUDA::cuda_driver) target_include_directories(nvshmemapi PRIVATE - ${NVSHMEM_HOME}/include/) + ${NVSHMEM_HOME}/include/ + ${CMAKE_CURRENT_SOURCE_DIR}/../include) target_include_directories(nvshmemapi PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} "${CMAKE_CURRENT_SOURCE_DIR}") @@ -24,4 +25,4 @@ target_include_directories(nvshmemapi PUBLIC set_target_properties(nvshmemapi PROPERTIES CUDA_STANDARD 17 POSITION_INDEPENDENT_CODE ON - CUDA_SEPARABLE_COMPILATION ON) \ No newline at end of file + CUDA_SEPARABLE_COMPILATION ON) diff --git a/transformer_engine/common/nvshmem_api/nvshmem_waitkernel.cu b/transformer_engine/common/nvshmem_api/nvshmem_waitkernel.cu index efa7d0d53a..f81062d63b 100644 --- a/transformer_engine/common/nvshmem_api/nvshmem_waitkernel.cu +++ b/transformer_engine/common/nvshmem_api/nvshmem_waitkernel.cu @@ -15,6 +15,7 @@ #include #include +#include "../util/cuda_driver.h" #include "../util/logging.h" #include "nvshmem_waitkernel.h"