* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
* \file test_cost_model.cpp
* \brief
*/
#include "gtest/gtest.h"
#include <dlfcn.h>
#include "interface/configs/config_manager.h"
#include "operator/models/llama/llama_def.h"
#include "cost_model/simulation/common/CommonType.h"
#include "test_common.h"
#include "test_dev_func_runner.h"
#include "cost_model/simulation/cost_model_launcher.h"
using namespace npu::tile_fwk;
namespace CostModel {
class CostModelTest : public testing::Test {
public:
static void SetUpTestCase() {}
static void TearDownTestCase() {}
void SetUp() override
{
oriEnableCostModel = config::GetPlatformConfig(KEY_ENABLE_COST_MODEL, oriEnableCostModel);
config::SetPlatformConfig(KEY_ENABLE_COST_MODEL, true);
oriEnableAihacBackend = config::GetPlatformConfig(KEY_ENABLE_AIHAC_BACKEND, oriEnableAihacBackend);
config::SetPlatformConfig(KEY_ENABLE_AIHAC_BACKEND, true);
oriEnableBinaryCache = config::GetPlatformConfig(KEY_ENABLE_BINARY_CACHE, oriEnableBinaryCache);
config::SetPlatformConfig(KEY_ENABLE_BINARY_CACHE, false);
Program::GetInstance().Reset();
RuntimeSetDevice(GetDeviceIdByEnvVar());
}
void TearDown() override
{
config::SetPlatformConfig(KEY_ENABLE_COST_MODEL, oriEnableCostModel);
config::SetPlatformConfig(KEY_ENABLE_AIHAC_BACKEND, oriEnableAihacBackend);
config::SetPlatformConfig(KEY_ENABLE_BINARY_CACHE, oriEnableBinaryCache);
}
protected:
bool oriEnableAihacBackend = false;
bool oriEnableCostModel = false;
bool oriEnableBinaryCache = false;
};
class CostModelDynTest : public testing::Test {
public:
static void SetUpTestCase() {}
static void TearDownTestCase() {}
void SetUp() override
{
cacheEnable = config::GetPassGlobalConfig(KEY_ENABLE_BINARY_CACHE, false);
config::SetPassGlobalConfig(KEY_ENABLE_BINARY_CACHE, false);
oriEnableAihacBackend = config::GetPlatformConfig(KEY_ENABLE_AIHAC_BACKEND, oriEnableAihacBackend);
config::SetPlatformConfig(KEY_ENABLE_AIHAC_BACKEND, true);
Program::GetInstance().Reset();
}
void TearDown() override
{
config::SetPassGlobalConfig(KEY_ENABLE_BINARY_CACHE, cacheEnable);
config::SetPlatformConfig(KEY_ENABLE_AIHAC_BACKEND, oriEnableAihacBackend);
}
protected:
bool oriEnableAihacBackend = false;
bool cacheEnable = false;
};
void CostModelTestLoopViewAssemble(const Tensor& t0, const Tensor& t1, const Tensor& blockTable, Tensor& out, int s)
{
FUNCTION("main", {t0, t1, blockTable}, {out})
{
LOOP("L0", FunctionType::DYNAMIC_LOOP, i, LoopRange(GetInputShape(t0, 0) / s))
{
SymbolicScalar idx = GetTensorData(blockTable, {i, 0});
Tensor t0s = View(t0, {s, s}, {idx * s, 0});
Tensor qi(DT_FP32, {s, 2 * s}, "qi");
Assemble(t1, {0, 0}, qi);
Assemble(t0s, {0, s}, qi);
Tensor ki(DT_FP32, {s, 2 * s}, "ki");
Assemble(t0s, {0, 0}, ki);
Assemble(t1, {0, s}, ki);
Tensor t2 = Matrix::Matmul(DataType::DT_FP32, qi, ki, false, true);
Assemble(t2, {idx * s, 0}, out);
}
}
}
TEST_F(CostModelDynTest, TestDD)
{
config::SetRuntimeOption(CFG_RUN_MODE, CFG_RUN_MODE_SIM);
constexpr int tilingX = 32;
constexpr int tilingY = 32;
TileShape::Current().SetVecTile(tilingX, tilingY);
constexpr int tilingM = 32;
constexpr int tilingN = 32;
constexpr int tilingK = 32;
TileShape::Current().SetCubeTile({tilingM, tilingM}, {tilingN, tilingN}, {tilingK, tilingK});
std::vector<uint8_t> devProgBinary;
int s = 32;
int n = 8;
Tensor t0(DT_FP32, {n * s, s}, "t0");
Tensor t1(DT_FP32, {s, s}, "t1");
Tensor blockTable{DT_INT32, {n, 1}, "blockTable"};
Tensor out(DT_FP32, {n * s, s}, "out");
CostModelTestLoopViewAssemble(t0, t1, blockTable, out, s);
std::vector<int> tblData;
for (int i = 0; i < n; i++)
tblData.push_back(i);
ProgramData::GetInstance().AppendInputs({
RawTensorData::CreateConstantTensor<float>(t0, 1.0), RawTensorData::CreateConstantTensor<float>(t1, 2.0),
RawTensorData::CreateTensor<int>(blockTable, tblData),
});
ProgramData::GetInstance().AppendOutputs({
RawTensorData::CreateConstantTensor<float>(out, 0.0f),
});
auto func = Program::GetInstance().GetLastFunction();
#ifdef BUILD_WITH_CANN
CostModelLauncher::CostModelRunOnce(func);
std::vector<float> golden(n * s * s, 128.0f);
auto outs = npu::tile_fwk::ProgramData::GetInstance().GetOutputData(0);
EXPECT_TRUE(resultCmp(golden, (float*)outs->data(), 0.001f));
#endif
}
TEST_F(CostModelDynTest, TestGG)
{
config::SetRuntimeOption(CFG_RUN_MODE, CFG_RUN_MODE_SIM);
config::SetCodeGenConfig(KEY_CODEGEN_SUPPORT_TILE_TENSOR, false);
constexpr int tilingX = 32;
constexpr int tilingY = 32;
TileShape::Current().SetVecTile(tilingX, tilingY);
constexpr int tilingM = 32;
constexpr int tilingN = 32;
constexpr int tilingK = 32;
TileShape::Current().SetCubeTile({tilingM, tilingM}, {tilingN, tilingN}, {tilingK, tilingK});
Tensor t0(DT_FP32, {32, 32}, "t0");
Tensor t1(DT_FP32, {32, 32}, "t1");
Tensor t2(DT_FP32, {32, 32}, "t2");
Tensor t3(DT_FP32, {32, 32}, "t3");
FUNCTION("main", {t0, t1}, {t3}, {{t2, t0}})
{
LOOP("l0", FunctionType::DYNAMIC_LOOP, i, LoopRange(1))
{
UNUSED(i);
t3 = Add(t0, t1);
Assemble(t3, {0, 0}, t2);
}
}
ProgramData::GetInstance().AppendInputs({
RawTensorData::CreateConstantTensor<float>(t0, 1.0),
RawTensorData::CreateConstantTensor<float>(t1, 2.0),
});
ProgramData::GetInstance().AppendOutputs({
RawTensorData::CreateConstantTensor<float>(t3, 0.0f),
});
auto func = Program::GetInstance().GetLastFunction();
#ifdef BUILD_WITH_CANN
CostModelLauncher::CostModelRunOnce(func);
std::vector<float> golden(tilingX * tilingY, 3.0f);
auto outs = npu::tile_fwk::ProgramData::GetInstance().GetOutputData(0);
EXPECT_TRUE(resultCmp(golden, (float*)outs->data(), 0.001f));
#endif
}
}