cmake_minimum_required(VERSION 3.16)
project(vllm_ascend_C)
set(CMAKE_CXX_STANDARD 17)
include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
set(ignoreMe "${VLLM_PYTHON_PATH}")
set(PYTHON_SUPPORTED_VERSIONS "3.9" "3.10" "3.11")
find_package(pybind11 REQUIRED)
append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path")
set(VLLM_ASCEND_INSTALL_PATH "${CMAKE_INSTALL_PREFIX}")
find_package(Torch REQUIRED)
run_python(TORCH_VERSION
"import torch; print(torch.__version__)" "Failed to locate torch path")
if(NOT ${TORCH_VERSION} VERSION_EQUAL "2.10.0")
message(FATAL_ERROR "Expected PyTorch version 2.10.0, but found ${TORCH_VERSION}")
endif()
set(RUN_MODE "npu" CACHE STRING "cpu/sim/npu")
set(SOC_VERSION ${SOC_VERSION})
message(STATUS "Detected SOC version: ${SOC_VERSION}")
if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRINGS "Build type Release/Debug (default Release)" FORCE)
endif()
if (CMAKE_INSTALL_PREFIX STREQUAL /usr/local)
set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRINGS "path to install()")
endif()
set(ASCEND_CANN_PACKAGE_PATH ${ASCEND_HOME_PATH})
if(EXISTS ${ASCEND_HOME_PATH}/tools/tikcpp/ascendc_kernel_cmake)
set(ASCENDC_CMAKE_DIR ${ASCEND_HOME_PATH}/tools/tikcpp/ascendc_kernel_cmake)
elseif(EXISTS ${ASCEND_HOME_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
set(ASCENDC_CMAKE_DIR ${ASCEND_HOME_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
elseif(EXISTS ${ASCEND_HOME_PATH}/ascendc_devkit/tikcpp/samples/cmake)
set(ASCENDC_CMAKE_DIR ${ASCEND_HOME_PATH}/ascendc_devkit/tikcpp/samples/cmake)
else()
message(FATAL_ERROR "ascendc_kernel_cmake does not exist, please check whether the cann package is installed.")
endif()
include(${ASCENDC_CMAKE_DIR}/ascendc.cmake)
file(GLOB KERNEL_FILES
${CMAKE_CURRENT_SOURCE_DIR}/csrc/kernels/*.cpp)
set(VLLM_ASCEND_CUSTOM_OP
${KERNEL_FILES}
${CMAKE_CURRENT_SOURCE_DIR}/csrc/mla_preprocess/op_kernel/mla_preprocess_kernel.cpp
${CMAKE_CURRENT_SOURCE_DIR}/csrc/batch_matmul_transpose/op_kernel/batch_matmul_transpose_kernel.cpp
)
set(VLLM_ASCEND_CUSTOM_OP_EXCLUDE_ASCEND950
${CMAKE_CURRENT_SOURCE_DIR}/csrc/mla_preprocess/op_kernel/mla_preprocess_kernel.cpp
${CMAKE_CURRENT_SOURCE_DIR}/csrc/batch_matmul_transpose/op_kernel/batch_matmul_transpose_kernel.cpp
)
if(SOC_VERSION MATCHES "ascend950")
message(STATUS "A5 hardware detected: disabling MLAPO operators")
message(STATUS "A5 hardware detected: excluding batch_matmul_transpose operators")
list(REMOVE_ITEM VLLM_ASCEND_CUSTOM_OP ${VLLM_ASCEND_CUSTOM_OP_EXCLUDE_ASCEND950})
endif()
if(SOC_VERSION MATCHES "ascend310p.*|ascend950")
message(STATUS "Hardware ${SOC_VERSION} detected: skip vllm_ascend_kernels compile")
else()
ascendc_library(vllm_ascend_kernels SHARED
${VLLM_ASCEND_CUSTOM_OP}
)
endif()
message("TORCH_NPU_PATH is ${TORCH_NPU_PATH}")
if(SOC_VERSION MATCHES "ascend310p.*")
file(GLOB VLLM_ASCEND_SRC
${CMAKE_CURRENT_SOURCE_DIR}/csrc/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/csrc/aclnn_torch_adapter/*.cpp)
else()
file(GLOB VLLM_ASCEND_SRC
${CMAKE_CURRENT_SOURCE_DIR}/csrc/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/csrc/aclnn_torch_adapter/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/csrc/batch_matmul_transpose/op_host/tiling/tiling_data.cpp)
endif()
include_directories(
${pybind11_INCLUDE_DIRS}
${PYTHON_INCLUDE_PATH}
${TORCH_INCLUDE_DIRS}
${TORCH_NPU_PATH}/include
${ASCEND_HOME_PATH}/include
${CMAKE_CURRENT_SOURCE_DIR}/csrc/batch_matmul_transpose/op_host
)
set(
INCLUDES
${TORCH_INCLUDE_DIRS}
${TORCH_NPU_INCLUDE_DIRS}
${ASCEND_HOME_PATH}/include
)
pybind11_add_module(vllm_ascend_C ${VLLM_ASCEND_SRC})
include(CheckCXXSourceCompiles)
set(CMAKE_REQUIRED_INCLUDES ${ASCEND_HOME_PATH}/include)
set(CMAKE_REQUIRED_LIBRARIES ascendcl)
set(CMAKE_REQUIRED_LINK_OPTIONS "-L${ASCEND_HOME_PATH}/lib64")
if(DEFINED VLLM_ASCEND_ENABLE_BATCH_MEMCPY)
if("${VLLM_ASCEND_ENABLE_BATCH_MEMCPY}" STREQUAL "1")
message(STATUS "aclrtMemcpyBatchAsync: force enabled via VLLM_ASCEND_ENABLE_BATCH_MEMCPY=1")
target_compile_definitions(vllm_ascend_C PRIVATE CANN_MEMCPY_BATCH_ASYNC)
else()
message(STATUS "aclrtMemcpyBatchAsync: force disabled via VLLM_ASCEND_ENABLE_BATCH_MEMCPY=0")
endif()
else()
check_cxx_source_compiles("
#include <acl/acl_rt.h>
int main() {
aclrtMemLocation loc = {};
loc.type = ACL_MEM_LOCATION_TYPE_HOST;
loc.id = 0;
aclrtMemcpyBatchAttr attr = {};
attr.srcLoc = loc;
attr.dstLoc = loc;
(void)aclrtMemcpyBatchAsync;
return 0;
}
" HAVE_ACLRT_MEMCPY_BATCH_ASYNC)
if(HAVE_ACLRT_MEMCPY_BATCH_ASYNC)
message(STATUS "aclrtMemcpyBatchAsync: detected in CANN headers, enabling batch memcpy path")
target_compile_definitions(vllm_ascend_C PRIVATE CANN_MEMCPY_BATCH_ASYNC)
else()
message(STATUS "aclrtMemcpyBatchAsync: not found in CANN headers, using fallback aclrtMemcpyAsync loop")
endif()
endif()
if(SOC_VERSION MATCHES "ascend310p.*")
target_compile_definitions(vllm_ascend_C PRIVATE -DASCEND_PLATFORM_310P)
endif()
if(NOT (SOC_VERSION MATCHES "ascend310p.*|ascend950"))
target_compile_definitions(vllm_ascend_C PRIVATE -DVLLM_ENABLE_ATB_AND_DIRECT_KERNELS)
endif()
target_link_directories(
vllm_ascend_C
PRIVATE
${TORCH_LIBRARY_DIRS}
${TORCH_NPU_PATH}/lib/
${ASCEND_HOME_PATH}/lib64
)
set(VLLM_ASCEND_C_COMMON_LIBS
${TORCH_LIBRARIES}
torch_npu
ascendcl
tiling_api
register
platform
ascendalog
dl
opapi
)
if(SOC_VERSION MATCHES "ascend310p.*|ascend950")
target_link_libraries(
vllm_ascend_C
PUBLIC
${VLLM_ASCEND_C_COMMON_LIBS}
)
else()
target_link_libraries(
vllm_ascend_C
PUBLIC
vllm_ascend_kernels
${VLLM_ASCEND_C_COMMON_LIBS}
)
endif()
target_link_options(vllm_ascend_C PRIVATE "-Wl,-rpath,$ORIGIN:$ORIGIN/lib:$ORIGIN/_cann_ops_custom/vendors/custom_transformer/op_api/lib")
if(SOC_VERSION MATCHES "ascend310p.*|ascend950")
install(TARGETS vllm_ascend_C DESTINATION ${VLLM_ASCEND_INSTALL_PATH})
else()
install(TARGETS vllm_ascend_C vllm_ascend_kernels DESTINATION ${VLLM_ASCEND_INSTALL_PATH})
endif()