set(CMAKE_LINK_DEPENDS_NO_SHARED 1)
include_directories(
${PROJECT_SOURCE_DIR}/src/device/
${PROJECT_SOURCE_DIR}/src/device_simt/
)
if(SOC_TYPE STREQUAL "Ascend950")
add_compile_definitions(CATLASS_ARCH=3510)
else()
add_compile_definitions(CATLASS_ARCH=2201)
endif()
set(ACLSHMEM_ALL_KERNEL_TARGETS "" CACHE INTERNAL "All collective kernel shared library targets")
set(PYEXPAND_SUPPORTED_TORCH
kv_shuffle
allgather
allgather_matmul
)
if(DEFINED ENABLE_ASCENDC_DUMP AND ENABLE_ASCENDC_DUMP)
add_compile_definitions(ENABLE_ASCENDC_DUMP)
add_compile_definitions(ASCENDC_DUMP=1)
add_compile_definitions(ASCENDC_DEBUG)
link_libraries(ascend_dump)
endif()
function(aclshmem_add_fusion_example NAME)
if(USE_EXAMPLES)
add_executable(${NAME} ${ARGN})
if(DEFINED ENABLE_ASCENDC_DUMP AND ENABLE_ASCENDC_DUMP)
target_compile_definitions(${NAME} PRIVATE ENABLE_ASCENDC_DUMP ASCENDC_DUMP=1 ASCENDC_DEBUG)
endif()
target_compile_options(${NAME} PRIVATE ${CMAKE_CCE_COMPILE_OPTIONS} ${CCE_AICORE_ARCH} ${SANITIZER_FLAGS})
target_include_directories(${NAME} PRIVATE
${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/3rdparty/catlass/include
${PROJECT_SOURCE_DIR}/3rdparty/catlass/examples/common
${PROJECT_SOURCE_DIR}/examples/${NAME}
${CMAKE_CURRENT_SOURCE_DIR}
${PROJECT_SOURCE_DIR}/examples/templates/include
${PROJECT_SOURCE_DIR}/examples/utils
${PROJECT_SOURCE_DIR}/src/device/
${PROJECT_SOURCE_DIR}/src/device_simt/
${PROJECT_SOURCE_DIR}/src/
${MPI_INCLUDE_PATH}
$ENV{ASCEND_HOME_PATH}/tools/mssanitizer/include
)
target_link_options(${NAME} PRIVATE --cce-fatobj-link ${CCE_AICORE_ARCH} ${SANITIZER_FLAGS})
if(DEFINED ENABLE_ASCENDC_DUMP AND ENABLE_ASCENDC_DUMP)
target_link_libraries(${NAME} PRIVATE ascend_dump)
endif()
target_link_libraries(${NAME} PRIVATE shmem)
target_compile_options(${NAME} PRIVATE ${MPI_CXX_COMPILE_FLAGS})
endif()
if(PYEXPAND_EXAMPLE AND ${NAME} IN_LIST PYEXPAND_SUPPORTED_TORCH)
add_library(${NAME}_kernel SHARED ${ARGN})
if(DEFINED ENABLE_ASCENDC_DUMP AND ENABLE_ASCENDC_DUMP)
target_compile_definitions(${NAME}_kernel PRIVATE ENABLE_ASCENDC_DUMP ASCENDC_DUMP=1 ASCENDC_DEBUG)
endif()
target_compile_options(${NAME}_kernel PRIVATE ${CMAKE_CCE_COMPILE_OPTIONS} --cce-aicore-arch=dav-c220 ${SANITIZER_FLAGS})
target_include_directories(${NAME}_kernel PRIVATE
${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/3rdparty/catlass/include
${PROJECT_SOURCE_DIR}/3rdparty/catlass/examples/common
${PROJECT_SOURCE_DIR}/examples/${NAME}
${CMAKE_CURRENT_SOURCE_DIR}
${PROJECT_SOURCE_DIR}/examples/templates/include
${PROJECT_SOURCE_DIR}/examples/utils
${PROJECT_SOURCE_DIR}/src/device/
${PROJECT_SOURCE_DIR}/src/device_simt/
${PROJECT_SOURCE_DIR}/src/
${MPI_INCLUDE_PATH}
)
target_link_options(${NAME}_kernel PRIVATE --cce-fatobj-link --cce-aicore-arch=dav-c220 ${SANITIZER_FLAGS})
if(DEFINED ENABLE_ASCENDC_DUMP AND ENABLE_ASCENDC_DUMP)
target_link_libraries(${NAME}_kernel PRIVATE ascend_dump)
endif()
target_link_libraries(${NAME}_kernel PRIVATE shmem)
target_compile_options(${NAME}_kernel PRIVATE ${MPI_CXX_COMPILE_FLAGS})
if(${NAME} STREQUAL "dispatch_gmm_combine")
target_compile_options(${NAME}_kernel PRIVATE --cce-auto-sync)
target_include_directories(${NAME}_kernel PRIVATE ${PROJECT_SOURCE_DIR}/examples/${NAME}/include)
endif()
list(APPEND ACLSHMEM_ALL_KERNEL_TARGETS ${NAME}_kernel)
set(ACLSHMEM_ALL_KERNEL_TARGETS ${ACLSHMEM_ALL_KERNEL_TARGETS} CACHE INTERNAL "Update kernel targets")
install(TARGETS ${NAME}_kernel
LIBRARY DESTINATION torch_binding/kernels)
endif()
endfunction()
function(aclshmem_add_collective_example NAME)
add_library(${NAME}_kernel SHARED ${NAME}_kernel.cpp)
target_compile_options(${NAME}_kernel PRIVATE ${CMAKE_CCE_COMPILE_OPTIONS} ${CCE_AICORE_ARCH_VEC} ${SANITIZER_FLAGS})
target_include_directories(${NAME}_kernel PRIVATE
${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/3rdparty/catlass/include
${PROJECT_SOURCE_DIR}/3rdparty/catlass/examples/common
${PROJECT_SOURCE_DIR}/examples/${NAME}
${CMAKE_CURRENT_SOURCE_DIR}
${PROJECT_SOURCE_DIR}/examples/utils
${PROJECT_SOURCE_DIR}/src/device/
${PROJECT_SOURCE_DIR}/src/device_simt/
${PROJECT_SOURCE_DIR}/src/
$ENV{ASCEND_HOME_PATH}/tools/mssanitizer/include
)
target_link_options(${NAME}_kernel PRIVATE --cce-fatobj-link ${CCE_AICORE_ARCH_VEC} ${SANITIZER_FLAGS})
find_library(UNIFIED_DLOG_LIB NAMES unified_dlog PATHS $ENV{ASCEND_HOME_PATH}/lib64)
if(UNIFIED_DLOG_LIB)
target_link_libraries(${NAME}_kernel PRIVATE unified_dlog)
endif()
if(USE_EXAMPLES)
add_executable(${NAME} main.cpp)
target_compile_options(${NAME} PRIVATE ${CMAKE_CPP_COMPILE_OPTIONS})
target_include_directories(${NAME} PRIVATE
${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/3rdparty/catlass/examples/common
${PROJECT_SOURCE_DIR}/examples/${NAME}
${CMAKE_CURRENT_SOURCE_DIR}
${PROJECT_SOURCE_DIR}/examples/templates/include
${PROJECT_SOURCE_DIR}/examples/utils
${PROJECT_SOURCE_DIR}/src/device/
${PROJECT_SOURCE_DIR}/src/device_simt/
${PROJECT_SOURCE_DIR}/src/host
${PROJECT_SOURCE_DIR}/src/host/bootstrap/config_store
${PROJECT_SOURCE_DIR}/src/host/bootstrap/config_store/acc_links/include
${PROJECT_SOURCE_DIR}/src/host/mem/heap
${MPI_INCLUDE_PATH}
)
target_link_libraries(${NAME} PRIVATE shmem ${NAME}_kernel)
target_compile_options(${NAME} PRIVATE ${MPI_CXX_COMPILE_FLAGS})
endif()
if(PYEXPAND_EXAMPLE AND ${NAME} IN_LIST PYEXPAND_SUPPORTED_TORCH)
list(APPEND ACLSHMEM_ALL_KERNEL_TARGETS ${NAME}_kernel)
set(ACLSHMEM_ALL_KERNEL_TARGETS ${ACLSHMEM_ALL_KERNEL_TARGETS} CACHE INTERNAL "Update kernel targets")
install(TARGETS ${NAME}_kernel
LIBRARY DESTINATION torch_binding/kernels)
endif()
endfunction()
set(ACLSHMEM_CATLASS_FUSION_EXAMPLES
allgather_matmul
allgather_matmul_with_gather_result
allgather_matmul_padding
dispatch_gmm_combine
matmul_allreduce
matmul_reduce_scatter
matmul_reduce_scatter_padding
dynamic_tiling
)
foreach(EXAMPLE
combine
dispatch
kv_shuffle
aclgraph_demo
allgather
allgather_matmul
allgather_matmul_with_gather_result
allgather_matmul_padding
dispatch_gmm_combine
matmul_allreduce
matmul_reduce_scatter
matmul_reduce_scatter_padding
multi_instance
dynamic_tiling
rma_d2h_demo
sdma
notifywait
cmo
shmem_perftest
udma_demo
udma_atomic_add
)
if(SOC_TYPE STREQUAL "Ascend950" AND ${EXAMPLE} IN_LIST ACLSHMEM_CATLASS_FUSION_EXAMPLES)
message(STATUS "Skip example '${EXAMPLE}' on Ascend950 (catlass not yet supported)")
continue()
endif()
add_subdirectory(${EXAMPLE})
endforeach()
if(ACLSHMEM_HCCS_SIO_LINK)
add_subdirectory(hccs_sio_link)
endif()
if(ACLSHMEM_SIMT_SUPPORT)
foreach(
EXAMPLE
simt_rma
simt_rma_scalar
simt_rma_perftest
)
add_subdirectory(${EXAMPLE})
endforeach()
endif()
if(ACLSHMEM_RDMA_SUPPORT)
foreach(EXAMPLE
rdma_perftest
rdma_demo
rdma_aclgraph_demo
rdma_handlewait_test/unuse_handlewait
rdma_handlewait_test/use_handlewait
)
add_subdirectory(${EXAMPLE})
endforeach()
endif()
if(PYEXPAND_EXAMPLE)
add_subdirectory(torch_binding)
endif()