mc2tiling::A5_CCU_ENGINE (值为5)
0
mc2_tiling_utils.h
5
isSuccess()
SetHcclTiling()
withCommEngine(0)
mc2tiling::A5_CCU_ENGINE
int
uint8_t
mc2tiling::A5_CCU_ENGINE = 5
A5_CCU_ENGINE
matmul_allto_all
args.rankDim
formulaicTiling.GetTiling()
matmul_allto_all_fit_balance_tiling.h:64
tilingM_.cutRes
mc2/matmul_allto_all/op_host/CMakeLists.txt
mc2/matmul_allto_all/op_host/op_tiling/arch35/fp_matmul_allto_all_tiling_base.cpp
GetTilingResult()
mc2/matmul_allto_all/op_host/op_tiling/arch35/fp_matmul_allto_all_tiling_base.h
mc2/matmul_allto_all/op_host/op_tiling/arch35/kc_quant_matmul_allto_all_tiling_base.cpp
mc2/matmul_allto_all/op_host/op_tiling/arch35/kc_quant_matmul_allto_all_tiling_base.h
mc2/matmul_allto_all/op_host/op_tiling/arch35/mx_quant_matmul_allto_all_tiling_base.cpp
mc2/matmul_allto_all/op_host/op_tiling/arch35/mx_quant_matmul_allto_all_tiling_base.h
mc2/matmul_allto_all/op_host/op_tiling/arch35/matmul_allto_all_fit_balance_tiling.h
GetArch35TilingResult()
mc2/matmul_allto_all/op_host/op_tiling/common/allto_all_tiling_factory.cpp
mc2/matmul_allto_all/op_host/op_tiling/common/allto_all_tiling_factory.h
mc2/matmul_allto_all/op_host/op_tiling/matmul_allto_all_tiling_base.cpp
mc2/matmul_allto_all/op_host/op_tiling/matmul_allto_all_tiling_base.h
mc2/common/op_host/op_tiling/hccl_performance_arch35.cpp
mc2/matmul_allto_all/op_host/op_tiling/arch35/matmul_allto_all_fit_balance_tiling.cpp
mc2/matmul_allto_all/op_host/op_tiling/common/matmul_allto_all_util_tiling.h
CreateTiling()
CutResult
TilingArgs
GetInputTensor
MxQuantMatmulAllToAllTilingBase
attrs->GetAttrPointer
GetAttrPointer<bool>(ATTR_X2_TRANSPOSE_INDEX)
if (isX2TransX2)
std::min/max
mmInfo_.mValue <= SMALL_M_BAR_FOR_OVERHEAD
uint64_t
constexpr static uint64_t L2_CACHE_SIZE = 128 * ONE_MBYTE
double totalMatmulTime = matmulPerf_.MatmulTime(...)
uint64_t totalLen = ...
constexpr static
ratioCalcComm_ = max / min
std::min(totalTpTime, totalMatmulTime)
mValue / OVERHEAD_AWARE_MAX_TILES
OVERHEAD_AWARE_MAX_TILES = 3
minTileForOverhead / alignLen
alignLen
GetAlignLength()
totalLen / alignedLongLen
alignedLongLen == 0
[]
GetAttrPointer
kValue ≤ 65535
mValue/nValue ≤ INT32_MAX
(x + align - 1) / align * align
new/malloc
ops/pr_4667_review_report.md