* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
* \file kernel_operator_fixpipe_impl.h
* \brief
*/
#if !defined(__ASCENDC_INCLUDE_INTERNAL_HEADERS__)
#pragma message("impl/basic_api/dav_l311/kernel_operator_fixpipe_impl.h is an internal header file and must not be used directly. Functions or variables defined in this file may be removed in the future. Please use \"#include \"basic_api/kernel_tensor.h\"\" and use public functions or variables defined in interface headers files.")
#define __ASCENDC_INCLUDE_INTERNAL_HEADERS__
#define __UNDEF_ASCENDC_INCLUDE_INTERNAL_HEADERS_KERNEL_OPERATOR_FIXPIPE_IMPL_H__
#endif
#ifndef ASCENDC_MODULE_OPERATOR_FIXPIPE_IMPL_H
#define ASCENDC_MODULE_OPERATOR_FIXPIPE_IMPL_H
#include "kernel_operator_set_spr_impl.h"
namespace AscendC {
__aicore__ inline void SetFixPipeClipReluImpl(uint64_t config)
{
(void)(config);
ASCENDC_REPORT_NOT_SUPPORT(false, "SetFixPipeClipRelu");
}
template <typename T>
__aicore__ inline void SetFixPipeAddrImpl(const LocalTensor<T> &eleWiseTensor, uint16_t c0ChStride)
{
ASCENDC_REPORT_NOT_SUPPORT(false, "SetFixPipeAddr");
}
* SPR *
* ************************************************************************************************* */
template <typename T>
__aicore__ inline void SetFixPipeConfigImpl(const LocalTensor<T> &reluPre, const LocalTensor<T> &quantPre,
bool isUnitFlag = false)
{
uint64_t config = 0;
config = config | ((uint64_t)reluPre.GetPhyAddr() >> 6);
config = config | (((uint64_t)quantPre.GetPhyAddr() >> 7) << 8);
config = config | (static_cast<uint64_t>(isUnitFlag) << 63);
set_fpc(config);
}
template <typename T, bool setRelu = false>
__aicore__ inline void SetFixPipeConfigImpl(const LocalTensor<T> &preTensor, bool isUnitFlag = false)
{
uint64_t config = 0;
if constexpr (setRelu) {
config = config | ((uint64_t)preTensor.GetPhyAddr() >> 6);
} else {
config =
config | (((uint64_t)preTensor.GetPhyAddr() >> 7) << 8);
}
config = config | (static_cast<uint64_t>(isUnitFlag) << 63);
set_fpc(config);
}
__aicore__ inline void SetFixpipeNz2ndFlagImpl(uint16_t ndNum, uint16_t srcNdStride, uint32_t dstNdStride)
{
uint64_t config = (static_cast<uint64_t>(dstNdStride) << 32) | (static_cast<uint64_t>(srcNdStride) << 16) |
(static_cast<uint64_t>(ndNum));
set_loop3_para(config);
}
__aicore__ inline void SetFixpipePreQuantFlagImpl(uint64_t config)
{
set_quant_pre(config);
}
* Fixpipe *
* ************************************************************************************************* */
template <CO2Layout format>
struct TransformParamsL311 {};
template <>
struct TransformParamsL311<CO2Layout::NZ> {
__aicore__ inline TransformParamsL311(){};
using PARAMS = uint8_t;
};
template <>
struct TransformParamsL311<CO2Layout::ROW_MAJOR> {
__aicore__ inline TransformParamsL311(){};
using PARAMS = Nz2NdParams;
};
template <>
struct TransformParamsL311<CO2Layout::COLUMN_MAJOR> {
__aicore__ inline TransformParamsL311(){};
using PARAMS = uint8_t;
};
template <CO2Layout format = CO2Layout::ROW_MAJOR>
struct FixpipeParamsL311 {
__aicore__ FixpipeParamsL311() {}
__aicore__ FixpipeParamsL311(const uint16_t nSizeIn_, const uint16_t mSizeIn_, const uint16_t srcStrideIn_,
const uint32_t dstStrideIn_)
{
nSize = nSizeIn_;
mSize = mSizeIn_;
srcStride = srcStrideIn_;
dstStride = dstStrideIn_;
}
uint16_t nSize = 0;
uint16_t mSize = 0;
uint16_t srcStride = 0;
uint32_t dstStride = 0;
uint64_t deqScalar;
QuantMode_t quantPre = QuantMode_t::NoQuant;
bool reluEn = false;
uint8_t unitFlag = 0;
bool subBlockId = false;
uint8_t dualDstCtl = 0;
typename TransformParamsL311<format>::PARAMS params;
bool isChannelSplit = false;
};
struct FixpipeTiling {
uint16_t nIterNum = 0;
uint16_t nSize = 0;
bool isDb = false;
uint16_t tailNSize = 0;
};
__aicore__ inline FixpipeTiling GenFixpipeTiling(uint16_t n)
{
FixpipeTiling tiling;
uint16_t maxDeqNums = 1024;
if (n <= maxDeqNums) {
tiling.nIterNum = 1;
tiling.nSize = n;
tiling.isDb = false;
tiling.tailNSize = 0;
} else {
tiling.isDb = true;
uint16_t dbMaxDeqNums = maxDeqNums / 2;
tiling.nIterNum = n / dbMaxDeqNums;
tiling.nSize = dbMaxDeqNums;
tiling.tailNSize = n % dbMaxDeqNums;
}
return tiling;
}
__aicore__ inline bool IsVectorQuantMode(QuantMode_t quantPre)
{
return (quantPre == QuantMode_t::VDEQF16 || quantPre == QuantMode_t::VQF162B8_PRE ||
quantPre == QuantMode_t::VREQ8 || quantPre == QuantMode_t::VQF162S4_PRE ||
quantPre == QuantMode_t::VREQ4 || quantPre == QuantMode_t::VDEQS16 ||
quantPre == QuantMode_t::VQF162S16_PRE);
}
__aicore__ inline bool IsScalarQuantMode(QuantMode_t quantPre)
{
return (quantPre == QuantMode_t::DEQF16 || quantPre == QuantMode_t::QF162B8_PRE ||
quantPre == QuantMode_t::REQ8 || quantPre == QuantMode_t::QF162S4_PRE ||
quantPre == QuantMode_t::REQ4 || quantPre == QuantMode_t::DEQS16 ||
quantPre == QuantMode_t::QF162S16_PRE);
}
__aicore__ inline void CopyDeqTensorToFbuf(
__cbuf__ uint64_t *cbufWorkspace, const FixpipeTiling &fixpipeTiling, uint16_t calNSize, uint16_t nIterIndex)
{
constexpr uint32_t deqTensorAddrAlignValue = 128;
uint16_t deqDataSize = DivCeil(calNSize * sizeof(uint64_t), deqTensorAddrAlignValue) * deqTensorAddrAlignValue;
__fbuf__ uint64_t *deqTensorTempBuf =
AscendCUtils::GetTemporaryFbBufferAddr<uint64_t>(0, deqDataSize / sizeof(uint64_t));
uint32_t deqValueOffset = nIterIndex * fixpipeTiling.nSize;
constexpr uint16_t fbufBurstLenUnit = 64;
uint16_t fbufBurstLen = deqDataSize / fbufBurstLenUnit;
copy_cbuf_to_fbuf(deqTensorTempBuf, cbufWorkspace + deqValueOffset, 1, fbufBurstLen, 0, 0);
uint64_t deqTensorAddr = ((uint64_t)deqTensorTempBuf >> static_cast<uint64_t>(7)) << 8;
set_fpc(deqTensorAddr);
AscendCUtils::FreeTemporaryFbBuffer<uint64_t>(deqTensorTempBuf);
}
template <const FixpipeConfig& config>
__aicore__ inline void SetLoop3Para(const FixpipeParamsL311<config.format>& intriParams)
{
if constexpr (config.format == CO2Layout::ROW_MAJOR) {
ASCENDC_ASSERT((intriParams.params.ndNum > 0), { KERNEL_LOG(KERNEL_ERROR, "ndNum must larger than 0"); });
uint64_t loop3Para = static_cast<uint64_t>(intriParams.params.dstNdStride) << 32;
loop3Para |= static_cast<uint64_t>(intriParams.params.ndNum);
loop3Para |= static_cast<uint64_t>(intriParams.params.srcNdStride) << 16;
set_loop3_para(loop3Para);
} else if constexpr (config.format == CO2Layout::COLUMN_MAJOR) {
ASCENDC_ASSERT((intriParams.params.dnNum > 0), { KERNEL_LOG(KERNEL_ERROR, "dnNum must larger than 0"); });
uint64_t loop3Para = static_cast<uint64_t>(intriParams.params.dstDnMatrixStride) << 32;
loop3Para |= static_cast<uint64_t>(intriParams.params.dnNum);
loop3Para |= static_cast<uint64_t>(intriParams.params.srcNzMatrixStride) << 16;
set_loop3_para(loop3Para);
}
}
template <typename DstT, typename SrcT, const FixpipeConfig& config>
__aicore__ inline void FixpipeL0cToL1(__cbuf__ DstT* dst, __cc__ SrcT* src,
const FixpipeParamsL311<config.format>& intriParams, const FixpipeTiling& fixpipeTiling, uint16_t calNSize,
uint16_t nIterIndex = 0)
{
ASCENDC_DEBUG_ASSERT((!(intriParams.isChannelSplit)), KERNEL_LOG_INTERNAL(KERNEL_ERROR, "Failed to check isChannelSplit in Fixpipe, when src position is "
"CO1 and dst position is C1, isChannelSplit must be set as false \n"));
uint16_t cburstNum = fixpipeTiling.nSize / BLOCK_CUBE;
uint32_t srcOffset = cburstNum * nIterIndex * intriParams.srcStride * BLOCK_CUBE;
uint32_t dstOffset = 0;
bool nz2ndEn = false;
bool nz2dnEn = false;
if constexpr (config.format == CO2Layout::ROW_MAJOR) {
dstOffset = nIterIndex * fixpipeTiling.nSize;
nz2ndEn = true;
} else if constexpr (config.format == CO2Layout::COLUMN_MAJOR) {
dstOffset = nIterIndex * fixpipeTiling.nSize * intriParams.dstStride;
nz2dnEn = true;
} else {
dstOffset = cburstNum * nIterIndex * intriParams.dstStride * DEFAULT_C0_SIZE / sizeof(DstT);
}
return copy_matrix_cc_to_cbuf((__cbuf__ DstT *)(dst + dstOffset), (__cc__ SrcT *)(src + srcOffset),
0, calNSize, intriParams.mSize, intriParams.dstStride, intriParams.srcStride,
0, intriParams.unitFlag, static_cast<uint64_t>(intriParams.quantPre),
static_cast<uint8_t>(intriParams.reluEn), false, nz2ndEn,
static_cast<uint64_t>(QuantMode_post::NoConv), 0, false, false, 0, false, false, false,
false, false, nz2dnEn);
}
template <const FixpipeConfig& config>
__aicore__ inline uint64_t GetGMLen(const FixpipeParamsL311<config.format>& intriParams,
const uint16_t calNSize, const uint16_t dstEleSize)
{
constexpr uint16_t fracNsize = 16;
uint64_t cburstNum = calNSize / fracNsize;
uint64_t gmLen = (cburstNum - 1) * intriParams.dstStride * dstEleSize +
intriParams.mSize * fracNsize * dstEleSize;
if constexpr (config.format == CO2Layout::ROW_MAJOR) {
gmLen = (static_cast<uint64_t>(intriParams.params.ndNum) - 1) * dstEleSize * intriParams.params.dstNdStride +
(intriParams.mSize - 1) * intriParams.dstStride * dstEleSize +
cburstNum * fracNsize * dstEleSize;
} else if constexpr (config.format == CO2Layout::COLUMN_MAJOR) {
gmLen = (static_cast<uint64_t>(intriParams.params.dnNum) - 1) * dstEleSize * intriParams.params.dstDnMatrixStride +
(intriParams.nSize - 1) * intriParams.dstStride * dstEleSize +
intriParams.mSize / fracNsize * fracNsize * dstEleSize;
}
return gmLen;
}
template <typename DstT, typename SrcT, const FixpipeConfig& config>
__aicore__ inline void FixpipeL0cToOut(__gm__ DstT* dst, __cc__ SrcT* src,
const FixpipeParamsL311<config.format>& intriParams, const FixpipeTiling& fixpipeTiling, uint16_t calNSize,
uint16_t nIterIndex = 0)
{
if (intriParams.isChannelSplit) {
ASCENDC_DEBUG_ASSERT((IsSameType<SrcT, float>::value && IsSameType<DstT, float>::value), KERNEL_LOG_INTERNAL(KERNEL_ERROR, "Failed to check "
"isChannelSplit value in Fixpipe, isChannelSplit can be set true only when src and dst are both float \n"));
ASCENDC_DEBUG_ASSERT((config.format == CO2Layout::NZ), KERNEL_LOG_INTERNAL(KERNEL_ERROR, "Failed to check format value in Fixpipe, "
"when isChannelSplit is set true, format must be set as NZ \n"));
}
uint16_t cburstNum = fixpipeTiling.nSize / BLOCK_CUBE;
uint32_t srcOffset = cburstNum * nIterIndex * intriParams.srcStride * BLOCK_CUBE;
uint32_t dstOffset = 0;
bool nz2ndEn = false;
bool nz2dnEn = false;
if constexpr (config.format == CO2Layout::ROW_MAJOR) {
dstOffset = nIterIndex * fixpipeTiling.nSize;
nz2ndEn = true;
} else if constexpr (config.format == CO2Layout::COLUMN_MAJOR) {
dstOffset = nIterIndex * fixpipeTiling.nSize * intriParams.dstStride;
nz2dnEn = true;
} else {
dstOffset = cburstNum * nIterIndex * intriParams.dstStride * DEFAULT_C0_SIZE / sizeof(DstT);
}
if constexpr (g_gm_overflow_check) {
bool isSrc = false;
uint16_t dstEleSize = sizeof(DstT);
uint64_t gmLen = GetGMLen<config>(intriParams, calNSize, dstEleSize);
AscendCUtils::CheckGmMemOverflow((__gm__ DstT*)(dst + dstOffset), isSrc, gmLen);
}
if constexpr (!(IsSameType<SrcT, float>::value || IsSameType<DstT, float>::value)) {
return copy_matrix_cc_to_gm((__gm__ DstT *)(dst + dstOffset), (__cc__ SrcT *)(src + srcOffset),
0, calNSize, intriParams.mSize, intriParams.dstStride, intriParams.srcStride,
0, intriParams.unitFlag, static_cast<uint64_t>(intriParams.quantPre),
static_cast<uint8_t>(intriParams.reluEn), intriParams.isChannelSplit, nz2ndEn,
static_cast<uint64_t>(QuantMode_post::NoConv), 0, false, false, 0, false, false, false,
false, false, nz2dnEn);
}
ASCENDC_DEBUG_ASSERT(!(IsSameType<SrcT, float>::value || IsSameType<DstT, float>::value), KERNEL_LOG_INTERNAL(KERNEL_ERROR,
"unsupported float L0c 2 GM "));
}
template <typename DstT, typename SrcT, const FixpipeConfig& config>
__aicore__ inline void FixpipeL0C2L1ImplN(__cbuf__ DstT* dst, __cc__ SrcT* src, __cbuf__ uint64_t* cbufWorkspace,
const FixpipeParamsL311<config.format>& intriParams, const FixpipeTiling& fixpipeTiling, uint16_t calNSize,
uint16_t nIterIndex)
{
CopyDeqTensorToFbuf(cbufWorkspace, fixpipeTiling, calNSize, nIterIndex);
PipeBarrier<PIPE_FIX>();
FixpipeL0cToL1<DstT, SrcT, config>(dst, src, intriParams, fixpipeTiling, calNSize, nIterIndex);
}
template <typename DstT, typename SrcT, const FixpipeConfig& config>
__aicore__ inline void FixpipeL0C2GMImplN(__gm__ DstT* dst, __cc__ SrcT* src, __cbuf__ uint64_t* cbufWorkspace,
const FixpipeParamsL311<config.format>& intriParams, const FixpipeTiling& fixpipeTiling, uint16_t calNSize,
uint16_t nIterIndex)
{
CopyDeqTensorToFbuf(cbufWorkspace, fixpipeTiling, calNSize, nIterIndex);
PipeBarrier<PIPE_FIX>();
FixpipeL0cToOut<DstT, SrcT, config>(dst, src, intriParams, fixpipeTiling, calNSize, nIterIndex);
}
template <typename DstT, typename SrcT, const FixpipeConfig& config>
__aicore__ inline void FixpipeL0C2L1Impl(
__cbuf__ DstT* dst, __cc__ SrcT* src, const FixpipeParamsL311<config.format>& intriParams)
{
SetLoop3Para<config>(intriParams);
if constexpr (config.format == CO2Layout::COLUMN_MAJOR) {
uint64_t channelPara = static_cast<uint64_t>(intriParams.params.srcNzC0Stride)
<< 48;
set_channel_para(channelPara);
}
make code for scalar quant mode:
1. copy deq scalar u64 immediate
2. code gen: move data from l0c to l1
*/
if (IsScalarQuantMode(intriParams.quantPre)) {
set_quant_pre(intriParams.deqScalar);
}
PipeBarrier<PIPE_FIX>();
FixpipeTiling fixpipeTiling;
FixpipeL0cToL1<DstT, SrcT, config>(dst, src, intriParams, fixpipeTiling, intriParams.nSize);
}
template <typename DstT, typename SrcT, const FixpipeConfig& config>
__aicore__ inline void FixpipeL0C2L1Impl(__cbuf__ DstT* dst, __cc__ SrcT* src, __cbuf__ uint64_t* cbufWorkspace,
const FixpipeParamsL311<config.format> &intriParams)
{
SetLoop3Para<config>(intriParams);
if constexpr (config.format == CO2Layout::COLUMN_MAJOR) {
uint64_t channelPara = static_cast<uint64_t>(intriParams.params.srcNzC0Stride)
<< 48;
set_channel_para(channelPara);
}
make code for vector quant mode:
1. generate fixpipeTiling
2. copy deq tensor from l1 to fb1 (l1 -> fb1)
3. code gen: move data from l0c to l1
*/
if (IsVectorQuantMode(intriParams.quantPre)) {
FixpipeTiling fixpipeTiling = GenFixpipeTiling(intriParams.nSize);
for (uint16_t i = 0; i < fixpipeTiling.nIterNum; ++i) {
FixpipeL0C2L1ImplN<DstT, SrcT, config>(
dst, src, cbufWorkspace, intriParams, fixpipeTiling, fixpipeTiling.nSize, i);
}
if (fixpipeTiling.tailNSize > 0) {
FixpipeL0C2L1ImplN<DstT, SrcT, config>(dst, src, cbufWorkspace, intriParams,
fixpipeTiling, fixpipeTiling.tailNSize, fixpipeTiling.nIterNum);
}
return;
}
}
template <typename DstT, typename SrcT, const FixpipeConfig& config>
__aicore__ inline void FixpipeL0C2GMImpl(__gm__ DstT* dst, __cc__ SrcT* src,
const FixpipeParamsL311<config.format>& intriParams)
{
SetLoop3Para<config>(intriParams);
if constexpr (config.format == CO2Layout::COLUMN_MAJOR) {
uint64_t channelPara = static_cast<uint64_t>(intriParams.params.srcNzC0Stride)
<< 48;
set_channel_para(channelPara);
}
make code for scalar quant mode:
1. copy deq scalar u64 immediate
2. code gen: move data from l0c to gm
*/
if (IsScalarQuantMode(intriParams.quantPre)) {
set_quant_pre(intriParams.deqScalar);
}
PipeBarrier<PIPE_FIX>();
FixpipeTiling fixpipeTiling;
FixpipeL0cToOut<DstT, SrcT, config>(dst, src, intriParams, fixpipeTiling, intriParams.nSize);
}
template <typename DstT, typename SrcT, const FixpipeConfig& config>
__aicore__ inline void FixpipeL0C2GMImpl(__gm__ DstT* dst, __cc__ SrcT* src, __cbuf__ uint64_t* cbufWorkspace,
const FixpipeParamsL311<config.format>& intriParams)
{
SetLoop3Para<config>(intriParams);
if constexpr (config.format == CO2Layout::COLUMN_MAJOR) {
uint64_t channelPara = static_cast<uint64_t>(intriParams.params.srcNzC0Stride)
<< 48;
set_channel_para(channelPara);
}
make code for vector quant mode:
1. generate fixpipeTiling
2. copy deq tensor from l1 to fb1 (l1 -> fb1)
3. code gen: move data from l0c to gm
*/
FixpipeTiling fixpipeTiling = GenFixpipeTiling(intriParams.nSize);
if (IsVectorQuantMode(intriParams.quantPre)) {
for (uint16_t i = 0; i < fixpipeTiling.nIterNum; ++i) {
FixpipeL0C2GMImplN<DstT, SrcT, config>(
dst, src, cbufWorkspace, intriParams, fixpipeTiling, fixpipeTiling.nSize, i);
}
if (fixpipeTiling.tailNSize > 0) {
FixpipeL0C2GMImplN<DstT, SrcT, config>(dst, src, cbufWorkspace, intriParams,
fixpipeTiling, fixpipeTiling.tailNSize, fixpipeTiling.nIterNum);
}
return;
}
}
template <CO2Layout format>
__aicore__ inline void TransFixpipeParamsV220ToFixpipeParamsL311(
const FixpipeParamsV220 &intriParams, FixpipeParamsL311<format> &dstParams)
{
dstParams.nSize = intriParams.nSize;
dstParams.mSize = intriParams.mSize;
dstParams.srcStride = intriParams.srcStride;
dstParams.dstStride = intriParams.dstStride;
dstParams.deqScalar = intriParams.deqScalar;
dstParams.quantPre = intriParams.quantPre;
dstParams.reluEn = intriParams.reluEn;
dstParams.unitFlag = intriParams.unitFlag;
dstParams.isChannelSplit = intriParams.isChannelSplit;
if constexpr (format == CO2Layout::ROW_MAJOR) {
dstParams.params.ndNum = intriParams.ndNum;
dstParams.params.dstNdStride = intriParams.dstNdStride;
dstParams.params.srcNdStride = intriParams.srcNdStride;
} else if constexpr(format == CO2Layout::COLUMN_MAJOR) {
dstParams.params.ndNum = intriParams.ndNum;
dstParams.params.dstNdStride = intriParams.srcNdStride;
dstParams.params.srcNdStride = intriParams.srcNdStride;
}
}
template <typename DstT, typename SrcT, const FixpipeConfig& config>
__aicore__ inline void FixpipeL0C2L1Impl(__cbuf__ DstT *dst, __cc__ SrcT *src, const FixpipeParamsV220 &intriParams)
{
FixpipeParamsL311<config.format> params;
TransFixpipeParamsV220ToFixpipeParamsL311(intriParams, params);
FixpipeL0C2L1Impl<DstT, SrcT, config>(dst, src, params);
}
template <typename DstT, typename SrcT, const FixpipeConfig& config>
__aicore__ inline void FixpipeL0C2L1Impl(
__cbuf__ DstT *dst, __cc__ SrcT *src, __cbuf__ uint64_t *cbufWorkspace, const FixpipeParamsV220 &intriParams)
{
FixpipeParamsL311<config.format> params;
TransFixpipeParamsV220ToFixpipeParamsL311(intriParams, params);
FixpipeL0C2L1Impl<DstT, SrcT, config>(dst, src, cbufWorkspace, params);
}
template <typename DstT, typename SrcT, const FixpipeConfig& config>
__aicore__ inline void FixpipeL0C2GMImpl(__gm__ DstT *dst, __cc__ SrcT *src, const FixpipeParamsV220 &intriParams)
{
FixpipeParamsL311<config.format> params;
TransFixpipeParamsV220ToFixpipeParamsL311(intriParams, params);
FixpipeL0C2GMImpl<DstT, SrcT, config>(dst, src, params);
}
template <typename DstT, typename SrcT, const FixpipeConfig &config>
__aicore__ inline void FixpipeL0C2GMImpl(
__gm__ DstT *dst, __cc__ SrcT *src, __cbuf__ uint64_t *cbufWorkspace, const FixpipeParamsV220 &intriParams)
{
FixpipeParamsL311<config.format> params;
TransFixpipeParamsV220ToFixpipeParamsL311(intriParams, params);
FixpipeL0C2GMImpl<DstT, SrcT, config>(dst, src, cbufWorkspace, params);
}
template <typename T, typename U, const FixpipeConfig &config>
__aicore__ inline void FixpipeL0C2UBImpl(__ubuf__ T *dst, __cc__ U *src, const FixpipeParamsV220 &intriParams)
{
ASCENDC_ASSERT(false, { KERNEL_LOG(KERNEL_ERROR, "Fixpipe doesn't support L0C to UB on current device\n"); });
}
template <typename T, typename U, const FixpipeConfig &config>
__aicore__ inline void FixpipeL0C2UBImpl(
__ubuf__ T *dst, __cc__ U *src, __cbuf__ uint64_t *cbufWorkspace, const FixpipeParamsV220 &intriParams)
{
ASCENDC_ASSERT(false, { KERNEL_LOG(KERNEL_ERROR, "Fixpipe doesn't support L0C to UB on current device\n"); });
}
}
#endif
#if defined(__UNDEF_ASCENDC_INCLUDE_INTERNAL_HEADERS_KERNEL_OPERATOR_FIXPIPE_IMPL_H__)
#undef __ASCENDC_INCLUDE_INTERNAL_HEADERS__
#undef __UNDEF_ASCENDC_INCLUDE_INTERNAL_HEADERS_KERNEL_OPERATOR_FIXPIPE_IMPL_H__
#endif