* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
* \file mla_prolog_def.cpp
* \brief
*/
#include "register/op_def_registry.h"
namespace ops {
class MlaProlog : public OpDef {
public:
MlaProlog(const char *name) : OpDef(name)
{
this->Input("token_x")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_INT8, ge::DT_INT8})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
this->Input("weight_dq")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_INT8, ge::DT_INT8})
.FormatList({ge::FORMAT_FRACTAL_NZ})
.AutoContiguous();
this->Input("weight_uq_qr")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_INT8, ge::DT_INT8, ge::DT_INT8, ge::DT_INT8})
.FormatList({ge::FORMAT_FRACTAL_NZ})
.AutoContiguous();
this->Input("weight_uk")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
this->Input("weight_dkv_kr")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_INT8, ge::DT_INT8})
.FormatList({ge::FORMAT_FRACTAL_NZ})
.AutoContiguous();
this->Input("rmsnorm_gamma_cq")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
this->Input("rmsnorm_gamma_ckv")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
this->Input("rope_sin")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
this->Input("rope_cos")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
this->Input("cache_index")
.ParamType(REQUIRED)
.DataTypeList({ge::DT_INT64})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
this->Input("kv_cache")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_INT8, ge::DT_BF16, ge::DT_INT8, ge::DT_BF16})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
this->Input("kr_cache")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_INT8, ge::DT_BF16, ge::DT_INT8, ge::DT_BF16})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
this->Input("dequant_scale_x")
.ParamType(OPTIONAL)
.DataTypeList({ge::DT_FLOAT})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
this->Input("dequant_scale_w_dq")
.ParamType(OPTIONAL)
.DataTypeList({ge::DT_FLOAT})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
this->Input("dequant_scale_w_uq_qr")
.ParamType(OPTIONAL)
.DataTypeList({ge::DT_FLOAT})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
this->Input("dequant_scale_w_dkv_kr")
.ParamType(OPTIONAL)
.DataTypeList({ge::DT_FLOAT})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
this->Input("quant_scale_ckv")
.ParamType(OPTIONAL)
.DataTypeList({ge::DT_FLOAT})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
this->Input("quant_scale_ckr")
.ParamType(OPTIONAL)
.DataTypeList({ge::DT_FLOAT})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
this->Input("smooth_scales_cq")
.ParamType(OPTIONAL)
.DataTypeList({ge::DT_FLOAT})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
this->Output("query")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16})
.FormatList({ge::FORMAT_ND});
this->Output("query_rope")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16})
.FormatList({ge::FORMAT_ND});
this->Output("kv_cache")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_INT8, ge::DT_BF16, ge::DT_INT8, ge::DT_BF16})
.FormatList({ge::FORMAT_ND});
this->Output("kr_cache")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_INT8, ge::DT_BF16, ge::DT_INT8, ge::DT_BF16})
.FormatList({ge::FORMAT_ND});
this->Attr("rmsnorm_epsilon_cq").AttrType(OPTIONAL).Float(1e-05f);
this->Attr("rmsnorm_epsilon_ckv").AttrType(OPTIONAL).Float(1e-05f);
this->Attr("cache_mode").AttrType(OPTIONAL).String("BNSD");
OpAICoreConfig aicore_config;
aicore_config.DynamicCompileStaticFlag(true)
.DynamicFormatFlag(true)
.DynamicRankSupportFlag(true)
.DynamicShapeSupportFlag(true)
.NeedCheckSupportFlag(false)
.PrecisionReduceFlag(true)
.ExtendCfgInfo("aclnnSupport.value", "support_aclnn");
this->AICore().AddConfig("ascend910b", aicore_config);
this->AICore().AddConfig("ascend910_93", aicore_config);
OpAICoreConfig aicore_config_95;
aicore_config_95.Input("token_x")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_INT8, ge::DT_INT8, ge::DT_FLOAT8_E4M3FN, ge::DT_FLOAT8_E4M3FN})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
aicore_config_95.Input("weight_dq")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_INT8, ge::DT_INT8, ge::DT_FLOAT8_E4M3FN, ge::DT_FLOAT8_E4M3FN})
.FormatList({ge::FORMAT_FRACTAL_NZ})
.AutoContiguous();
aicore_config_95.Input("weight_uq_qr")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_INT8, ge::DT_INT8, ge::DT_INT8, ge::DT_INT8, ge::DT_FLOAT8_E4M3FN, ge::DT_FLOAT8_E4M3FN})
.FormatList({ge::FORMAT_FRACTAL_NZ})
.AutoContiguous();
aicore_config_95.Input("weight_uk")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
aicore_config_95.Input("weight_dkv_kr")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_INT8, ge::DT_INT8, ge::DT_FLOAT8_E4M3FN, ge::DT_FLOAT8_E4M3FN})
.FormatList({ge::FORMAT_FRACTAL_NZ})
.AutoContiguous();
aicore_config_95.Input("rmsnorm_gamma_cq")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
aicore_config_95.Input("rmsnorm_gamma_ckv")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
aicore_config_95.Input("rope_sin")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
aicore_config_95.Input("rope_cos")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
aicore_config_95.Input("cache_index")
.ParamType(REQUIRED)
.DataTypeList({ge::DT_INT64})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
aicore_config_95.Input("kv_cache")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_INT8, ge::DT_BF16, ge::DT_INT8, ge::DT_BF16, ge::DT_FLOAT8_E4M3FN, ge::DT_BF16})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
aicore_config_95.Input("kr_cache")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_INT8, ge::DT_BF16, ge::DT_INT8, ge::DT_BF16, ge::DT_FLOAT8_E4M3FN, ge::DT_BF16})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
aicore_config_95.Input("dequant_scale_x")
.ParamType(OPTIONAL)
.DataTypeList({ge::DT_FLOAT})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
aicore_config_95.Input("dequant_scale_w_dq")
.ParamType(OPTIONAL)
.DataTypeList({ge::DT_FLOAT})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
aicore_config_95.Input("dequant_scale_w_uq_qr")
.ParamType(OPTIONAL)
.DataTypeList({ge::DT_FLOAT})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
aicore_config_95.Input("dequant_scale_w_dkv_kr")
.ParamType(OPTIONAL)
.DataTypeList({ge::DT_FLOAT})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
aicore_config_95.Input("quant_scale_ckv")
.ParamType(OPTIONAL)
.DataTypeList({ge::DT_FLOAT})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
aicore_config_95.Input("quant_scale_ckr")
.ParamType(OPTIONAL)
.DataTypeList({ge::DT_FLOAT})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
aicore_config_95.Input("smooth_scales_cq")
.ParamType(OPTIONAL)
.DataTypeList({ge::DT_FLOAT})
.FormatList({ge::FORMAT_ND})
.AutoContiguous();
aicore_config_95.Output("query")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16})
.FormatList({ge::FORMAT_ND});
aicore_config_95.Output("query_rope")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16, ge::DT_BF16})
.FormatList({ge::FORMAT_ND});
aicore_config_95.Output("kv_cache")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_INT8, ge::DT_BF16, ge::DT_INT8, ge::DT_BF16, ge::DT_FLOAT8_E4M3FN, ge::DT_BF16})
.FormatList({ge::FORMAT_ND});
aicore_config_95.Output("kr_cache")
.ParamType(REQUIRED)
.DataType({ge::DT_BF16, ge::DT_INT8, ge::DT_BF16, ge::DT_INT8, ge::DT_BF16, ge::DT_FLOAT8_E4M3FN, ge::DT_BF16})
.FormatList({ge::FORMAT_ND});
aicore_config_95.DynamicCompileStaticFlag(true)
.DynamicFormatFlag(true)
.DynamicRankSupportFlag(true)
.DynamicShapeSupportFlag(true)
.NeedCheckSupportFlag(false)
.PrecisionReduceFlag(true)
.ExtendCfgInfo("aclnnSupport.value", "support_aclnn");
this->AICore().AddConfig("ascend950", aicore_config_95);
}
};
OP_ADD(MlaProlog, optiling::MlaPrologCompileInfo);
}