* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
* \file test_n_buffer_merge.cpp
* \brief Unit test for n_buffer_merge pass.
*/
#include <gtest/gtest.h>
#include "symbolic_scalar_test_utils.h"
#include "interface/function/function.h"
#include "tilefwk/tilefwk.h"
#include "tilefwk/platform.h"
#include "interface/inner/tilefwk.h"
#include "passes/pass_mgr/pass_manager.h"
#include "interface/configs/config_manager.h"
#include "passes/tile_graph_pass/graph_partition/n_buffer_merge.h"
#include <fstream>
#include <vector>
#include <string>
#include "computational_graph_builder.h"
#include "interface/tensor/irbuilder.h"
namespace npu {
namespace tile_fwk {
class NBufferMergeTest : public testing::Test {
public:
void SetUp() override
{
Program::GetInstance().Reset();
config::Reset();
config::SetHostOption(COMPILE_STAGE, CS_EXECUTE_GRAPH);
}
void TearDown() override {}
static void SetUpTestCase() {}
static void TearDownTestCase() {}
Status TestNBufferMergeWithDifferentVecBufferSetting(std::map<int64_t, int64_t> vecNBufferSetting);
};
namespace {
std::shared_ptr<Function> BuildMultiInputOutputFunction()
{
auto function = std::make_shared<Function>(Program::GetInstance(), "TestNBufferMerge", "TestNBufferMerge", nullptr);
EXPECT_TRUE(function != nullptr);
std::vector<int64_t> shape = {8, 16};
auto incast1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto incast2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto tensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto tensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto tensor3 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto tensor4 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto outcast1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto outcast2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto outcast3 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto outcast4 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto& mulOp1 = PassOperationUtils::AddOperation(*function, Opcode::OP_MUL, {incast1, incast2}, {tensor1});
auto& expOp1 = PassOperationUtils::AddOperation(*function, Opcode::OP_EXP, {tensor1}, {outcast1});
mulOp1.UpdateSubgraphID(0);
expOp1.UpdateSubgraphID(0);
auto& mulOp2 = PassOperationUtils::AddOperation(*function, Opcode::OP_MUL, {incast1, incast2}, {tensor2});
auto& expOp2 = PassOperationUtils::AddOperation(*function, Opcode::OP_EXP, {tensor2}, {outcast2});
mulOp2.UpdateSubgraphID(1);
expOp2.UpdateSubgraphID(1);
auto& mulOp3 = PassOperationUtils::AddOperation(*function, Opcode::OP_MUL, {incast1, incast2}, {tensor3});
auto& expOp3 = PassOperationUtils::AddOperation(*function, Opcode::OP_EXP, {tensor3}, {outcast3});
mulOp3.UpdateSubgraphID(2);
expOp3.UpdateSubgraphID(2);
auto& mulOp4 = PassOperationUtils::AddOperation(*function, Opcode::OP_MUL, {incast1, incast2}, {tensor4});
auto& expOp4 = PassOperationUtils::AddOperation(*function, Opcode::OP_EXP, {tensor4}, {outcast4});
mulOp4.UpdateSubgraphID(3);
expOp4.UpdateSubgraphID(3);
function->inCasts_.push_back(incast1);
function->inCasts_.push_back(incast2);
function->outCasts_.push_back(outcast1);
function->outCasts_.push_back(outcast2);
function->outCasts_.push_back(outcast3);
function->outCasts_.push_back(outcast4);
return function;
}
}
TEST_F(NBufferMergeTest, TestNBufferMerge)
{
auto currFunctionPtr =
std::make_shared<Function>(Program::GetInstance(), "TestNBufferMerge", "TestNBufferMerge", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
constexpr int subGraphID0 = 0;
constexpr int subGraphID1 = 1;
std::vector<int64_t> shape = {8, 16};
auto incast1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto incast2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto tensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto tensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto tensor3 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto tensor4 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto& copy_op1 = PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_COPY_IN, {incast1}, {tensor1});
copy_op1.UpdateSubgraphID(subGraphID0);
auto& copy_op2 = PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_COPY_IN, {incast2}, {tensor3});
copy_op2.UpdateSubgraphID(subGraphID1);
auto& copy_out1 = PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_COPY_OUT, {tensor1}, {tensor2});
copy_out1.UpdateSubgraphID(subGraphID0);
auto& copy_out2 = PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_COPY_OUT, {tensor3}, {tensor4});
copy_out2.UpdateSubgraphID(subGraphID1);
currFunctionPtr->inCasts_.push_back(incast1);
currFunctionPtr->inCasts_.push_back(incast2);
currFunctionPtr->outCasts_.push_back(tensor2);
currFunctionPtr->outCasts_.push_back(tensor4);
NBufferMerge nPass;
Pass& nbufferPass = nPass;
nbufferPass.PreCheck(*currFunctionPtr);
nbufferPass.Run(*currFunctionPtr, "", "");
nbufferPass.PostCheck(*currFunctionPtr);
}
TEST_F(NBufferMergeTest, TestMulityInputOutputMode3)
{
auto currFunctionPtr = BuildMultiInputOutputFunction();
NBufferMerge NBM;
const int vecParallelNum = 2;
currFunctionPtr->paramConfigs_.mgVecParallelLb = vecParallelNum;
currFunctionPtr->paramConfigs_.vecNBufferSetting = {{-2, 0}};
currFunctionPtr->SetTotalSubGraphCount(4);
EXPECT_EQ(NBM.RunOnFunction(*currFunctionPtr), SUCCESS);
EXPECT_EQ(currFunctionPtr->GetTotalSubGraphCount(), 2);
EXPECT_EQ(currFunctionPtr->inCasts_.size(), 2);
EXPECT_EQ(currFunctionPtr->outCasts_.size(), 4);
}
TEST_F(NBufferMergeTest, TestMulityInputOutputByFuncHashOrder)
{
auto currFunctionPtr = BuildMultiInputOutputFunction();
NBufferMerge NBM;
const int vecParallelNum = 2;
currFunctionPtr->paramConfigs_.mgVecParallelLb = vecParallelNum;
const auto funcMagic = currFunctionPtr->GetFuncMagic();
currFunctionPtr->paramConfigs_.vecNBufferSettingByFunc = {
{"func" + std::to_string(funcMagic) + "_0", vecParallelNum}
};
currFunctionPtr->SetTotalSubGraphCount(4);
EXPECT_EQ(NBM.RunOnFunction(*currFunctionPtr), SUCCESS);
EXPECT_EQ(currFunctionPtr->inCasts_.size(), 2);
EXPECT_EQ(currFunctionPtr->outCasts_.size(), 4);
EXPECT_EQ(currFunctionPtr->GetTotalSubGraphCount(), 2);
}
TEST_F(NBufferMergeTest, TestMode4)
{
ComputationalGraphBuilder G;
std::vector<int64_t> tileShape{16, 16};
const int vecParallelNum = 6;
const int result = 11;
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, {"incast0", "incast1", "outcast"}), true);
EXPECT_EQ(G.AddOps({Opcode::OP_COPY_IN}, {{"incast0"}}, {{"incast1"}}, {"copy_in"}, true), true);
G.GetOp("copy_in")->UpdateSubgraphID(0);
const int subGraphNum = 20;
for (int i = 1; i < subGraphNum; i++) {
std::string strID = std::to_string(i);
EXPECT_EQ(
G.AddTensors(DataType::DT_FP32, tileShape, {"tensor1" + strID, "tensor2" + strID, "tensor3" + strID}),
true);
std::vector<Opcode> opLists{Opcode::OP_ABS, Opcode::OP_EXP, Opcode::OP_ADDS, Opcode::OP_ASSEMBLE};
std::vector<std::vector<std::string>> iOperands{
{"incast1"}, {"tensor1" + strID}, {"tensor2" + strID}, {"tensor3" + strID}};
std::vector<std::vector<std::string>> oOperands{
{"tensor1" + strID}, {"tensor2" + strID}, {"tensor3" + strID}, {"outcast"}};
std::vector<std::string> opNames{"ABS_" + strID, "EXP_" + strID, "ADDS_" + strID, "ASSEMBLE_" + strID};
EXPECT_EQ(G.AddOps(opLists, iOperands, oOperands, opNames, true), true);
G.GetOp("ABS_" + strID)->UpdateSubgraphID(i);
G.GetOp("EXP_" + strID)->UpdateSubgraphID(i);
G.GetOp("ADDS_" + strID)->UpdateSubgraphID(i);
G.GetOp("ASSEMBLE_" + strID)->UpdateSubgraphID(i);
}
EXPECT_EQ(G.SetInCast({"incast0"}), true);
EXPECT_EQ(G.SetOutCast({"outcast"}), true);
Function* function = G.GetFunction();
function->paramConfigs_.vecNBufferSetting = {{-2, 1}, {-1, 4}, {1, 2}};
function->paramConfigs_.mgVecParallelLb = vecParallelNum;
function->SetTotalSubGraphCount(subGraphNum);
NBufferMerge NBM;
EXPECT_EQ(NBM.RunOnFunction(*function), SUCCESS);
EXPECT_EQ(function->GetTotalSubGraphCount(), result);
}
TEST_F(NBufferMergeTest, TestNoMergeMode)
{
ComputationalGraphBuilder G;
std::vector<int64_t> tileShape{16, 16};
const int vecParallelNum = 6;
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, {"incast0", "incast1", "outcast"}), true);
EXPECT_EQ(G.AddOps({Opcode::OP_COPY_IN}, {{"incast0"}}, {{"incast1"}}, {"copy_in"}, true), true);
G.GetOp("copy_in")->UpdateSubgraphID(0);
EXPECT_EQ(G.SetInCast({"incast0"}), true);
EXPECT_EQ(G.SetOutCast({"outcast"}), true);
Function* function = G.GetFunction();
function->paramConfigs_.vecNBufferSetting = {{-1, 1}};
function->paramConfigs_.mgVecParallelLb = vecParallelNum;
function->SetTotalSubGraphCount(1);
NBufferMerge NBM;
EXPECT_EQ(NBM.RunOnFunction(*function), SUCCESS);
}
TEST_F(NBufferMergeTest, TestInvalidMode)
{
EXPECT_EQ(TestNBufferMergeWithDifferentVecBufferSetting({{-2, 4}, {1, 2}}), FAILED);
}
TEST_F(NBufferMergeTest, TestMode2AndVecNBufferSettingKeyMoreThanMaxValue_Tolerated)
{
EXPECT_EQ(TestNBufferMergeWithDifferentVecBufferSetting({{-1, 4}, {100, 2}}), SUCCESS);
}
TEST_F(NBufferMergeTest, TestMode2AndVecNBufferSettingValueLessThanMinValue)
{
EXPECT_EQ(TestNBufferMergeWithDifferentVecBufferSetting({{-1, 4}, {1, 0}}), FAILED);
}
TEST_F(NBufferMergeTest, TestMode2AndVecNBufferSettingValueMoreThanMaxValue)
{
EXPECT_EQ(TestNBufferMergeWithDifferentVecBufferSetting({{-1, 4}, {1, INT64_MAX}}), FAILED);
}
TEST_F(NBufferMergeTest, TestMode2AndNoDefaultValue)
{
EXPECT_EQ(TestNBufferMergeWithDifferentVecBufferSetting({{0, 2}}), SUCCESS);
}
Status NBufferMergeTest::TestNBufferMergeWithDifferentVecBufferSetting(std::map<int64_t, int64_t> vecNBufferSetting)
{
std::vector<int64_t> tileShape{16, 16};
const int mgVecParallelLb = 3;
ComputationalGraphBuilder G;
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, {"incast0", "incast1", "outcast"}), true);
EXPECT_EQ(G.AddOps({Opcode::OP_COPY_IN}, {{"incast0"}}, {{"incast1"}}, {"copy_in"}, true), true);
G.GetOp("copy_in")->UpdateSubgraphID(0);
const int subGraphNum = 10;
for (int i = 1; i < subGraphNum; i++) {
std::string strID = std::to_string(i);
EXPECT_EQ(
G.AddTensors(DataType::DT_FP32, tileShape, {"tensor1" + strID, "tensor2" + strID, "tensor3" + strID}),
true);
std::vector<std::vector<std::string>> iOperands{
{"incast1"}, {"tensor1" + strID}, {"tensor2" + strID}, {"tensor3" + strID}};
std::vector<std::vector<std::string>> oOperands{
{"tensor1" + strID}, {"tensor2" + strID}, {"tensor3" + strID}, {"outcast"}};
std::vector<std::string> opNames{"ABS_" + strID, "EXP_" + strID, "ADDS_" + strID, "ASSEMBLE_" + strID};
std::vector<Opcode> opLists{Opcode::OP_ABS, Opcode::OP_EXP, Opcode::OP_ADDS, Opcode::OP_ASSEMBLE};
EXPECT_EQ(G.AddOps(opLists, iOperands, oOperands, opNames, true), true);
G.GetOp("EXP_" + strID)->UpdateSubgraphID(i);
G.GetOp("ADDS_" + strID)->UpdateSubgraphID(i);
G.GetOp("ASSEMBLE_" + strID)->UpdateSubgraphID(i);
G.GetOp("ABS_" + strID)->UpdateSubgraphID(i);
}
EXPECT_EQ(G.SetInCast({"incast0"}), true);
EXPECT_EQ(G.SetOutCast({"outcast"}), true);
Function* function = G.GetFunction();
function->paramConfigs_.vecNBufferSetting = vecNBufferSetting;
function->paramConfigs_.mgVecParallelLb = mgVecParallelLb;
function->SetTotalSubGraphCount(subGraphNum);
NBufferMerge NBM;
return NBM.RunOnFunction(*function);
}
Function* BuildFunctionWithSubgraphs(
ComputationalGraphBuilder& G, const std::vector<int64_t>& tileShape, int subGraphNum)
{
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, {"incast0", "incast1", "outcast"}), true);
EXPECT_EQ(G.AddOps({Opcode::OP_COPY_IN}, {{"incast0"}}, {{"incast1"}}, {"copy_in"}, true), true);
G.GetOp("copy_in")->UpdateSubgraphID(0);
for (int i = 1; i <= subGraphNum; i++) {
std::string strID = std::to_string(i);
EXPECT_EQ(
G.AddTensors(DataType::DT_FP32, tileShape, {"tensor1_" + strID, "tensor2_" + strID, "tensor3_" + strID}),
true);
std::vector<std::vector<std::string>> iOperands{
{"incast1"}, {"tensor1_" + strID}, {"tensor2_" + strID}, {"tensor3_" + strID}};
std::vector<std::vector<std::string>> oOperands{
{"tensor1_" + strID}, {"tensor2_" + strID}, {"tensor3_" + strID}, {"outcast"}};
std::vector<std::string> opNames{"ABS_" + strID, "EXP_" + strID, "ADDS_" + strID, "ASSEMBLE_" + strID};
std::vector<Opcode> opLists{Opcode::OP_ABS, Opcode::OP_EXP, Opcode::OP_ADDS, Opcode::OP_ASSEMBLE};
EXPECT_EQ(G.AddOps(opLists, iOperands, oOperands, opNames, true), true);
G.GetOp("ABS_" + strID)->UpdateSubgraphID(i);
G.GetOp("EXP_" + strID)->UpdateSubgraphID(i);
G.GetOp("ADDS_" + strID)->UpdateSubgraphID(i);
G.GetOp("ASSEMBLE_" + strID)->UpdateSubgraphID(i);
}
EXPECT_EQ(G.SetInCast({"incast0"}), true);
EXPECT_EQ(G.SetOutCast({"outcast"}), true);
return G.GetFunction();
}
TEST_F(NBufferMergeTest, TestSemanticLabelSetting)
{
std::vector<int64_t> tileShape{16, 16};
const int mgVecParallelLb = 3;
const int subGraphNum = 4;
ComputationalGraphBuilder G;
Function* function = BuildFunctionWithSubgraphs(G, tileShape, subGraphNum);
auto vecLabel = std::make_shared<SemanticLabel>("VecLabel", __FILE__, __LINE__);
for (int i = 1; i <= 2; i++) {
std::string strID = std::to_string(i);
G.GetOp("ABS_" + strID)->SetSemanticLabel(vecLabel);
G.GetOp("EXP_" + strID)->SetSemanticLabel(vecLabel);
}
function->paramConfigs_.vecNBufferSetting = {{-1, 4}};
function->paramConfigs_.vecNBufferSettingByLabel = {{"VecLabel", 1}};
function->paramConfigs_.mgVecParallelLb = mgVecParallelLb;
function->SetTotalSubGraphCount(subGraphNum + 1);
NBufferMerge NBM;
EXPECT_EQ(NBM.RunOnFunction(*function), SUCCESS);
}
TEST_F(NBufferMergeTest, ByFuncMergeDefaultTwo)
{
ComputationalGraphBuilder G;
std::vector<int64_t> tileShape{16, 16};
const int mgVecParallelLb = 48;
const int subGraphNum = 8;
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, {"incast0", "incast1", "outcast"}), true);
EXPECT_EQ(G.AddOps({Opcode::OP_COPY_IN}, {{"incast0"}}, {{"incast1"}}, {"copy_in"}, true), true);
G.GetOp("copy_in")->UpdateSubgraphID(0);
for (int i = 1; i <= subGraphNum; i++) {
std::string strID = std::to_string(i);
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, {"t1_" + strID, "t2_" + strID, "t3_" + strID}), true);
EXPECT_EQ(G.AddOps({Opcode::OP_ABS}, {{"incast1"}}, {{"t1_" + strID}}, {"ABS_" + strID}, true), true);
EXPECT_EQ(G.AddOps({Opcode::OP_EXP}, {{"t1_" + strID}}, {{"t2_" + strID}}, {"EXP_" + strID}, true), true);
EXPECT_EQ(G.AddOps({Opcode::OP_ADDS}, {{"t2_" + strID}}, {{"t3_" + strID}}, {"ADDS_" + strID}, true), true);
EXPECT_EQ(G.AddOps({Opcode::OP_ASSEMBLE}, {{"t3_" + strID}}, {{"outcast"}}, {"ASM_" + strID}, true), true);
G.GetOp("ABS_" + strID)->UpdateSubgraphID(i);
G.GetOp("EXP_" + strID)->UpdateSubgraphID(i);
G.GetOp("ADDS_" + strID)->UpdateSubgraphID(i);
G.GetOp("ASM_" + strID)->UpdateSubgraphID(i);
}
EXPECT_EQ(G.SetInCast({"incast0"}), true);
EXPECT_EQ(G.SetOutCast({"outcast"}), true);
Function* function = G.GetFunction();
function->paramConfigs_.vecNBufferSettingByFunc = {{"DEFAULT", 2}};
function->paramConfigs_.mgVecParallelLb = mgVecParallelLb;
function->SetTotalSubGraphCount(subGraphNum + 1);
NBufferMerge NBM;
EXPECT_EQ(NBM.RunOnFunction(*function), SUCCESS);
EXPECT_EQ(function->GetTotalSubGraphCount(), 5);
}
TEST_F(NBufferMergeTest, ByFuncMergeFuncSpecificOverride)
{
const int mgVecParallelLb = 48;
const int subGraphNum = 8;
ComputationalGraphBuilder G;
Function* function = BuildFunctionWithSubgraphs(G, {16, 16}, subGraphNum);
int fm = function->GetFuncMagic();
function->paramConfigs_.vecNBufferSettingByFunc = {
{"DEFAULT", 4},
{"func" + std::to_string(fm) + "_1", 2}
};
function->paramConfigs_.mgVecParallelLb = mgVecParallelLb;
function->SetTotalSubGraphCount(subGraphNum + 1);
NBufferMerge NBM;
EXPECT_EQ(NBM.RunOnFunction(*function), SUCCESS);
EXPECT_EQ(function->GetTotalSubGraphCount(), 5);
}
TEST_F(NBufferMergeTest, ByFuncMergeDefaultOneNoMerge)
{
const int mgVecParallelLb = 48;
const int subGraphNum = 8;
ComputationalGraphBuilder G;
Function* function = BuildFunctionWithSubgraphs(G, {16, 16}, subGraphNum);
function->paramConfigs_.vecNBufferSettingByFunc = {{"DEFAULT", 1}};
function->paramConfigs_.mgVecParallelLb = mgVecParallelLb;
function->SetTotalSubGraphCount(subGraphNum + 1);
NBufferMerge NBM;
EXPECT_EQ(NBM.RunOnFunction(*function), SUCCESS);
EXPECT_EQ(function->GetTotalSubGraphCount(), subGraphNum + 1);
}
}
}