mc2tiling::A5_CCU_ENGINE (值为5)
0
mc2_tiling_utils.h
5
isSuccess()
SetHcclTiling()
withCommEngine(0)
mc2tiling::A5_CCU_ENGINE
int
uint8_t
mc2tiling::A5_CCU_ENGINE = 5
A5_CCU_ENGINE
ENV_MC2_COMM_MODE_AICPU
Mc2Comm::GetCommModeFromEnv()
mc2/common/utils/mc2_comm_utils.h
mc2/allto_allv_quant_grouped_mat_mul/op_kernel/mc2_templates/communication/hccl_a2av_op.h
mc2/allto_allv_grouped_mat_mul/op_kernel/allto_allv_grouped_mat_mul_coarse_grained.h
mc2/allto_allv_quant_grouped_mat_mul/op_host/op_tiling/arch35/allto_allv_quant_grouped_mat_mul_tiling_common.cpp
mc2/allto_allv_grouped_mat_mul/op_host/op_tiling/allto_allv_grouped_mat_mul_tiling.cpp
ASC_DEVKIT_MAJOR
getenv()
static EsTransposeFunc func = GetEsTransposeFunc();
func(...)
mc2/matmul_allto_all/op_graph/fusion_pass/matmul_all_to_all_transpose_a5_fusion_pass.cpp
dlopen("libes_math.so", RTLD_LAZY | RTLD_GLOBAL)
RTLD_LAZY | RTLD_GLOBAL
mc2/common/utils/mc2_exception_dump.h
static
thread_local
func = nullptr
initialized = false
%s
dlerror()
%d
aclError
RTLD_NOLOAD | RTLD_LAZY
if (func == nullptr)
mc2/common/utils/mc2_log.cpp
mc2/matmul_reduce_scatter/op_host/op_tiling/matmul_reduce_scatter_tiling.cpp
mc2/matmul_reduce_scatter_v2/op_host/op_tiling/arch22/matmul_reduce_scatter_v2_aiv_mode_tiling.cpp
mc2/matmul_reduce_scatter_v2/op_host/op_tiling/arch35/matmul_reduce_scatter_v2_tiling.cpp
mc2/matmul_reduce_scatter_v2/op_host/op_tiling/arch35/quant_bmm_reduce_scatter_tiling.cpp
mc2/matmul_reduce_scatter_v2/op_host/op_tiling/matmul_reduce_scatter_tiling_base.cpp
OP_LOGD(opName, " xxx")
OP_LOGD(opName, "xxx")
matmul_reduce_scatter_tiling.cpp
matmul_reduce_scatter_v2_aiv_mode_tiling.cpp
matmul_reduce_scatter_v2_tiling.cpp
quant_bmm_reduce_scatter_tiling.cpp
matmul_reduce_scatter_tiling_base.cpp
%u
unsigned
%lu
unsigned long
opName]
]
[opName] message
matmul_reduce_scatter_tiling.cpp:152
mc2/moe_distribute_combine_v2/op_api/aclnn_moe_distribute_combine_v3.h:10
./build.sh && ./run_tests.sh
[适用: All]
common/include/kernel/
common/include/op_kernel/
common/include/tiling_base/
common/src/tiling_base/
common/include/op_host/
common/src/op_host/
#include
bash build.sh --pkg --soc=ascend910b --ops=all_gather_matmul_v2 -j16
.run
2610fdae7
FracDiv(remainingSpace, baseDynamic, 32U, 3U)
remainingSpace * 32
baseDynamic * 32 + 3
baseDynamic * num + den
sizeof
CAST_NONE
blockLen = scaleNum * sizeof(T)
xPerBlock
alignBlock
alignedXSize_
xSliceSize_
scaleNumsPerBlock_{0}
SetScaleNums()
vecComp_.InitBuffer()
ReadDataBlockReduceSum
scaleInQue_
scaleNumsPerBlcok_
scaleNumsPerBlock_