* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
#include <mki/base/bishengir_kernel_base.h>
#include <mki/utils/const/op_const.h>
#include <mki/utils/log/log.h>
#include <mki_loader/op_register.h>
#include "mixkernels/utils/common.h"
#include "tiling/fusion_tiling.h"
#include "tiling/tiling_data.h"
#include "atbops/params/params.h"
static constexpr uint32_t TENSOR_INPUT_NUM_MATMUL_ADD = 3;
static constexpr uint32_t TENSOR_INPUT_NUM_MATMUL_ACTIVATE = 2;
static constexpr uint32_t TENSOR_OUTPUT_NUM = 1;
namespace AtbOps {
using namespace Mki;
class FusionKernel : public BishengIRKernelBase {
public:
explicit FusionKernel(const std::string &kernelName, const BinHandle *handle) noexcept
: BishengIRKernelBase(kernelName, handle)
{
}
bool CanSupport(const LaunchParam &launchParam) const override
{
OpParam::Fusion fusionType = launchParam.GetParam<OpParam::Fusion>();
if (fusionType.fusionType == OpParam::Fusion::MATMUL_ADD) {
MKI_CHECK(launchParam.GetInTensorCount() == TENSOR_INPUT_NUM_MATMUL_ADD, "in tensor num invalid",
return false);
MKI_CHECK(launchParam.GetOutTensorCount() == TENSOR_OUTPUT_NUM, "out tensor num invalid", return false);
auto inTensor0 = launchParam.GetInTensor(DIM_0);
MKI_CHECK(inTensor0.desc.dtype == TENSOR_DTYPE_FLOAT16, "in tensor 0 dtype invalid", return false);
auto inTensor1 = launchParam.GetInTensor(DIM_1);
MKI_CHECK(inTensor1.desc.dtype == TENSOR_DTYPE_FLOAT16, "in tensor 1 dtype invalid", return false);
auto inTensor2 = launchParam.GetInTensor(DIM_2);
MKI_CHECK(inTensor2.desc.dtype == TENSOR_DTYPE_FLOAT16, "in tensor 2 dtype invalid", return false);
} else if (fusionType.fusionType == OpParam::Fusion::MATMUL_GELU) {
MKI_CHECK(launchParam.GetInTensorCount() == TENSOR_INPUT_NUM_MATMUL_ACTIVATE, "in tensor num invalid",
return false);
MKI_CHECK(launchParam.GetOutTensorCount() == TENSOR_OUTPUT_NUM, "out tensor num invalid", return false);
auto inTensor0 = launchParam.GetInTensor(DIM_0);
MKI_CHECK(inTensor0.desc.dtype == TENSOR_DTYPE_FLOAT16, "in tensor 0 dtype invalid", return false);
auto inTensor1 = launchParam.GetInTensor(DIM_1);
MKI_CHECK(inTensor1.desc.dtype == TENSOR_DTYPE_FLOAT16, "in tensor 1 dtype invalid", return false);
}
for (size_t i = 0; i < launchParam.GetInTensorCount(); i++) {
auto inTensor = launchParam.GetInTensor(i);
MKI_CHECK(inTensor.desc.dtype == TENSOR_DTYPE_FLOAT16, "in tensor dtype invalid", return false);
MKI_CHECK(inTensor.desc.format == TENSOR_FORMAT_ND, "in tensor " << i << " format invalid", return false);
MKI_CHECK(inTensor.desc.dims.size() == DIM_2, "in tensor " << i << " dim num invalid", return false);
}
for (size_t i = 0; i < TENSOR_OUTPUT_NUM; i++) {
auto outTensor = launchParam.GetOutTensor(i);
MKI_CHECK(outTensor.desc.dtype == TENSOR_DTYPE_FLOAT16, "in tensor dtype invalid", return false);
MKI_CHECK(outTensor.desc.format == TENSOR_FORMAT_ND, "out tensor " << i << " format invalid", return false);
MKI_CHECK(outTensor.desc.dims.size() == DIM_2, "out tensor " << i << " dim num invalid", return false);
}
return true;
}
uint64_t GetTilingSize(const LaunchParam &launchParam) const override
{
(void)launchParam;
return sizeof(FusionTilingData);
}
Status InitImpl(const LaunchParam &launchParam) override
{
kernelInfo_.SetHwsyncIdx(0);
return FusionTiling(launchParam, kernelInfo_);
}
};
using FusionMatmulAddKernel = FusionKernel;
using FusionMatmulGeluKernel = FusionKernel;
using FusionMatmulSigmoidKernel = FusionKernel;
using FusionMatmulSwiGluKernel = FusionKernel;
using FusionErasedKernel = FusionKernel;
REG_KERNEL_BASE(FusionMatmulAddKernel);
REG_KERNEL_BASE(FusionMatmulGeluKernel);
REG_KERNEL_BASE(FusionMatmulSigmoidKernel);
REG_KERNEL_BASE(FusionMatmulSwiGluKernel);
REG_KERNEL_BASE(FusionErasedKernel);
}