* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
* \file test_l1_copy_reuse.cpp
* \brief Unit test for L1CopyInReuseMerge pass.
*/
#include <gtest/gtest.h>
#include "symbolic_scalar_test_utils.h"
#include "interface/function/function.h"
#include "tilefwk/tilefwk.h"
#include "interface/inner/tilefwk.h"
#include "passes/pass_mgr/pass_manager.h"
#include "interface/configs/config_manager.h"
#include <fstream>
#include <set>
#include <vector>
#include <string>
#include <nlohmann/json.hpp>
#include "computational_graph_builder.h"
#include "passes/tile_graph_pass/graph_partition/graph_partition.h"
#include "passes/tile_graph_pass/graph_partition/l1_copy_reuse.h"
#include "interface/tensor/irbuilder.h"
using namespace npu::tile_fwk;
namespace npu {
namespace tile_fwk {
class L1CopyInReuseTest : public testing::Test {
public:
static void SetUpTestCase() {}
static void TearDownTestCase() {}
void SetUp() override
{
Program::GetInstance().Reset();
config::Reset();
config::SetHostOption(COMPILE_STAGE, CS_EXECUTE_GRAPH);
}
void TearDown() override {}
};
TEST_F(L1CopyInReuseTest, TwoCopyIn)
{
auto currFunctionPtr =
std::make_shared<Function>(Program::GetInstance(), "TestL1CopyInReuse", "TestL1CopyInReuse", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
constexpr int subGraphID0 = 0;
constexpr int subGraphID1 = 1;
std::vector<int64_t> shape = {8, 16};
auto shapeImme = OpImmediate::Specified(shape);
auto incast1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
incast1->tensor->rawmagic = 1;
incast1->memoryTypeToBe_ = MEM_DEVICE_DDR;
auto incast2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
incast2->tensor->rawmagic = 1;
incast2->memoryTypeOriginal_ = MEM_DEVICE_DDR;
auto tensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor1->memoryTypeOriginal_ = MEM_L1;
tensor1->tensor->rawmagic = 2;
auto tensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto tensor3 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor3->memoryTypeOriginal_ = MEM_L1;
tensor3->tensor->rawmagic = 3;
auto tensor4 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto& copy_op1 = PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_VIEW, {incast1}, {tensor1});
copy_op1.UpdateSubgraphID(subGraphID0);
copy_op1.SetOpAttribute(std::make_shared<ViewOpAttribute>(
std::vector<int64_t>{0, 0}, MEM_L1, std::vector<SymbolicScalar>(), std::vector<SymbolicScalar>()));
auto& copy_out1 = PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_L1_TO_L0A, {tensor1}, {tensor2});
copy_out1.UpdateSubgraphID(subGraphID0);
auto& view_op1 = PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_VIEW, {incast1}, {incast2});
view_op1.SetOpAttribute(std::make_shared<ViewOpAttribute>(std::vector<int64_t>{0, 0}));
view_op1.UpdateSubgraphID(subGraphID1);
auto& alloc_op1 = PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_L1_ALLOC, {}, {tensor3});
alloc_op1.UpdateSubgraphID(subGraphID1);
auto& copy_op2 = PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_VIEW, {incast2}, {tensor3});
copy_op2.UpdateSubgraphID(subGraphID1);
incast2->AddConsumer(copy_op2);
copy_op2.SetOpAttribute(std::make_shared<ViewOpAttribute>(
std::vector<int64_t>{0, 0}, MEM_L1, std::vector<SymbolicScalar>(), std::vector<SymbolicScalar>()));
auto& copy_out2 = PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_L1_TO_L0A, {tensor3}, {tensor4});
copy_out2.UpdateSubgraphID(subGraphID1);
currFunctionPtr->inCasts_.push_back(incast1);
currFunctionPtr->outCasts_.push_back(tensor2);
currFunctionPtr->outCasts_.push_back(tensor4);
L1CopyInReuseMerge pass;
pass.PreCheck(*currFunctionPtr);
pass.RunOnFunction(*currFunctionPtr);
pass.PostCheck(*currFunctionPtr);
}
void InitGraphBuilder(ComputationalGraphBuilder& G, std::vector<int64_t> tileShape, const int subGraphNum)
{
auto shapeImme = OpImmediate::Specified(tileShape);
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, {"incast0", "incast1", "outcast"}), true);
EXPECT_EQ(G.AddOps({Opcode::OP_VIEW}, {{"incast0"}}, {{"incast1"}}, {"view"}, true), true);
G.GetOp("view")->UpdateSubgraphID(0);
G.GetTensor("incast1")->tensor->rawmagic = 1;
G.GetTensor("incast1")->SetMemoryTypeOriginal(MEM_DEVICE_DDR);
for (int i = 1; i < subGraphNum; i++) {
std::string strID = std::to_string(i);
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, {"tensor" + strID}), true);
std::vector<Opcode> opLists{Opcode::OP_VIEW, Opcode::OP_EXP};
std::vector<std::vector<std::string>> iOperands{{"incast1"}, {"tensor" + strID}};
std::vector<std::vector<std::string>> oOperands{{"tensor" + strID}, {"outcast"}};
std::vector<std::string> opNames{"VIEW_" + strID, "EXP_" + strID};
EXPECT_EQ(G.AddOps(opLists, iOperands, oOperands, opNames, true), true);
G.GetOp("VIEW_" + strID)->UpdateSubgraphID(i);
G.GetOp("EXP_" + strID)->UpdateSubgraphID(i);
G.GetOp("VIEW_" + strID)
->SetOpAttribute(std::make_shared<ViewOpAttribute>(
std::vector<int64_t>{0, 0}, MEM_L1, std::vector<SymbolicScalar>(), std::vector<SymbolicScalar>()));
G.GetTensor("tensor" + strID)->SetMemoryTypeOriginal(MEM_L1);
}
EXPECT_EQ(G.SetInCast({"incast0"}), true);
EXPECT_EQ(G.SetOutCast({"outcast"}), true);
}
void BuildParallelAssembleSource(
ComputationalGraphBuilder& G, const std::string& prefix, int branchNum, std::vector<std::string>& gmTensors)
{
std::vector<int64_t> tileShape{16, 16};
std::string input = prefix + "In";
std::string viewUb = prefix + "ViewUb";
std::string cast0Ub = prefix + "Cast0Ub";
std::string addsUb = prefix + "AddsUb";
std::string cast1Ub = prefix + "Cast1Ub";
EXPECT_EQ(G.AddTensor(DataType::DT_FP16, tileShape, MemoryType::MEM_DEVICE_DDR, input), true);
EXPECT_EQ(G.AddTensor(DataType::DT_FP16, tileShape, MemoryType::MEM_UB, viewUb), true);
EXPECT_EQ(G.AddTensor(DataType::DT_FP16, tileShape, MemoryType::MEM_UB, cast0Ub), true);
EXPECT_EQ(G.AddTensor(DataType::DT_FP16, tileShape, MemoryType::MEM_UB, addsUb), true);
EXPECT_EQ(G.AddTensor(DataType::DT_FP16, tileShape, MemoryType::MEM_UB, cast1Ub), true);
EXPECT_EQ(G.AddOp(Opcode::OP_VIEW, {input}, {viewUb}, prefix + "View"), true);
auto viewOp = G.GetOp(prefix + "View");
viewOp->SetOpAttribute(std::make_shared<ViewOpAttribute>(
std::vector<int64_t>{0, 0}, MEM_UB, std::vector<SymbolicScalar>(), std::vector<SymbolicScalar>()));
EXPECT_EQ(G.AddOp(Opcode::OP_CONVERT, {viewUb}, {cast0Ub}, prefix + "Cast0"), true);
EXPECT_EQ(G.AddOp(Opcode::OP_ADDS, {cast0Ub}, {addsUb}, prefix + "Adds"), true);
EXPECT_EQ(G.AddOp(Opcode::OP_CONVERT, {addsUb}, {cast1Ub}, prefix + "Cast1"), true);
for (int i = 0; i < branchNum; ++i) {
std::string branchId = std::to_string(i);
std::string gmTensor = prefix + "Gm" + branchId;
std::string assembleName = prefix + "Assemble" + branchId;
EXPECT_EQ(G.AddTensor(DataType::DT_FP16, tileShape, MemoryType::MEM_DEVICE_DDR, gmTensor), true);
EXPECT_EQ(G.AddOp(Opcode::OP_ASSEMBLE, {cast1Ub}, {gmTensor}, assembleName), true);
auto assembleOp = G.GetOp(assembleName);
assembleOp->SetOpAttribute(
std::make_shared<AssembleOpAttribute>(MemoryType::MEM_DEVICE_DDR, std::vector<int64_t>{0, 0}));
gmTensors.push_back(gmTensor);
}
}
void BuildParallelMatmulBranch(
ComputationalGraphBuilder& G, const std::vector<std::string>& gmTensorsA, const std::vector<std::string>& gmTensorsB,
int branchIndex, std::vector<std::string>& outcasts)
{
std::vector<int64_t> tileShape{16, 16};
std::string branchId = std::to_string(branchIndex);
std::string viewA = "viewA" + branchId;
std::string viewB = "viewB" + branchId;
std::string l1A = "l1A" + branchId;
std::string l1B = "l1B" + branchId;
std::string l0A = "l0A" + branchId;
std::string l0B = "l0B" + branchId;
std::string l0C = "l0C" + branchId;
std::string matmul = "matmul" + branchId;
std::string copyOut = "copyOut" + branchId;
std::string out = "out" + branchId;
EXPECT_EQ(G.AddTensor(DataType::DT_FP16, tileShape, MemoryType::MEM_L1, l1A), true);
EXPECT_EQ(G.AddTensor(DataType::DT_FP16, tileShape, MemoryType::MEM_L1, l1B), true);
EXPECT_EQ(G.AddTensor(DataType::DT_FP16, tileShape, MemoryType::MEM_L0A, l0A), true);
EXPECT_EQ(G.AddTensor(DataType::DT_FP16, tileShape, MemoryType::MEM_L0B, l0B), true);
EXPECT_EQ(G.AddTensor(DataType::DT_FP16, tileShape, MemoryType::MEM_L0C, l0C), true);
EXPECT_EQ(G.AddTensor(DataType::DT_FP16, tileShape, MemoryType::MEM_DEVICE_DDR, out), true);
EXPECT_EQ(G.AddOp(Opcode::OP_VIEW, {gmTensorsA[branchIndex]}, {l1A}, viewA), true);
EXPECT_EQ(G.AddOp(Opcode::OP_VIEW, {gmTensorsB[branchIndex]}, {l1B}, viewB), true);
EXPECT_EQ(G.AddOp(Opcode::OP_L1_TO_L0A, {l1A}, {l0A}, "toA" + branchId), true);
EXPECT_EQ(G.AddOp(Opcode::OP_L1_TO_L0B, {l1B}, {l0B}, "toB" + branchId), true);
EXPECT_EQ(G.AddOp(Opcode::OP_A_MUL_B, {l0A, l0B}, {l0C}, matmul), true);
EXPECT_EQ(G.AddOp(Opcode::OP_COPY_OUT, {l0C}, {out}, copyOut), true);
auto viewAOp = G.GetOp(viewA);
auto viewBOp = G.GetOp(viewB);
viewAOp->SetOpAttribute(std::make_shared<ViewOpAttribute>(
std::vector<int64_t>{0, 0}, MEM_L1, std::vector<SymbolicScalar>(), std::vector<SymbolicScalar>()));
viewBOp->SetOpAttribute(std::make_shared<ViewOpAttribute>(
std::vector<int64_t>{0, 0}, MEM_L1, std::vector<SymbolicScalar>(), std::vector<SymbolicScalar>()));
auto toAOp = G.GetOp("toA" + branchId);
auto toBOp = G.GetOp("toB" + branchId);
auto matmulOp = G.GetOp(matmul);
auto copyOutOp = G.GetOp(copyOut);
viewAOp->SetAttr(OpAttributeKey::isCube, true);
viewBOp->SetAttr(OpAttributeKey::isCube, true);
toAOp->SetAttr(OpAttributeKey::isCube, true);
toBOp->SetAttr(OpAttributeKey::isCube, true);
matmulOp->SetAttr(OpAttributeKey::isCube, true);
copyOutOp->SetAttr(OpAttributeKey::isCube, true);
outcasts.push_back(out);
}
void BuildParallelMatmulGraph(ComputationalGraphBuilder& G, int branchNum)
{
std::vector<std::string> gmTensorsA;
std::vector<std::string> gmTensorsB;
BuildParallelAssembleSource(G, "srcA", branchNum, gmTensorsA);
BuildParallelAssembleSource(G, "srcB", branchNum, gmTensorsB);
std::vector<std::string> outcasts;
for (int i = 0; i < branchNum; ++i) {
BuildParallelMatmulBranch(G, gmTensorsA, gmTensorsB, i, outcasts);
}
EXPECT_EQ(G.SetInCast({"srcAIn", "srcBIn"}), true);
EXPECT_EQ(G.SetOutCast(outcasts), true);
}
std::set<int> GetTensorRawMagics(ComputationalGraphBuilder& G, const std::string& prefix, int branchNum)
{
std::set<int> rawMagics;
for (int i = 0; i < branchNum; ++i) {
rawMagics.insert(G.GetTensor(prefix + "Gm" + std::to_string(i))->GetRawMagic());
}
return rawMagics;
}
int CountOpcode(Function& function, Opcode opcode)
{
int count = 0;
for (auto& op : function.Operations()) {
if (op.GetOpcode() == opcode) {
++count;
}
}
return count;
}
std::set<int> GetMatmulSubgraphIds(ComputationalGraphBuilder& G, int branchNum)
{
std::set<int> subgraphIds;
for (int i = 0; i < branchNum; ++i) {
subgraphIds.insert(G.GetOp("matmul" + std::to_string(i))->GetSubgraphID());
}
return subgraphIds;
}
TEST_F(L1CopyInReuseTest, TestInvalidOp)
{
ComputationalGraphBuilder G;
std::vector<int64_t> tileShape{16, 16};
auto shapeImme = OpImmediate::Specified(tileShape);
const int subGraphNum = 20;
InitGraphBuilder(G, tileShape, subGraphNum);
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, {"tensorL1"}), true);
G.GetTensor("tensorL1")->SetMemoryTypeOriginal(MEM_L1);
EXPECT_EQ(G.AddOps({Opcode::OP_GATHER_IN_L1}, {{"incast1"}}, {{"tensorL1"}}, {"gather_in_l1"}, true), true);
G.GetOp("gather_in_l1")->UpdateSubgraphID(1);
Function* function = G.GetFunction();
function->paramConfigs_.cubeNBufferSetting = {{1, 2}, {-1, 4}};
function->paramConfigs_.cubeL1ReuseSetting = {{1, 2}, {-1, 2}};
function->SetTotalSubGraphCount(subGraphNum);
L1CopyInReuseMerge LCRM;
EXPECT_EQ(LCRM.RunOnFunction(*function), SUCCESS);
}
TEST_F(L1CopyInReuseTest, TestNormal)
{
ComputationalGraphBuilder G;
std::vector<int64_t> tileShape{16, 16};
auto shapeImme = OpImmediate::Specified(tileShape);
const int result = 5;
const int subGraphNum = 20;
InitGraphBuilder(G, tileShape, subGraphNum);
Function* function = G.GetFunction();
function->paramConfigs_.cubeNBufferSetting = {{1, 2}, {-1, 4}};
function->paramConfigs_.cubeL1ReuseSetting = {{1, 2}, {-1, 2}};
function->SetTotalSubGraphCount(subGraphNum);
L1CopyInReuseMerge LCRM;
EXPECT_EQ(LCRM.RunOnFunction(*function), SUCCESS);
EXPECT_EQ(function->GetTotalSubGraphCount(), result);
}
GraphPartitionCoeThenL1ReuseMerge
srcAIn -> view -> cast -> adds -> cast -> assemble0/1/2/3 -> srcAGm0/1/2/3 -> view(L1) -> l0A -> matmul
srcBIn -> view -> cast -> adds -> cast -> assemble0/1/2/3 -> srcBGm0/1/2/3 -> view(L1) -> l0B -> matmul
GraphPartition 内部新增的 COE 会删除冗余 assemble,只保留一份共享的 GM 输出;
随后 L1CopyInReuseMerge 基于统一后的 GM->L1 view,将多路并行 matmul 合并到同一子图。
*/
TEST_F(L1CopyInReuseTest, TestParallelAssembleEnableParallelMatmulL1ReuseMerge)
{
constexpr int branchNum = 4;
auto func = std::make_shared<Function>(Program::GetInstance(), "TestGraphPartitionCoeL1Reuse",
"TestGraphPartitionCoeL1Reuse", nullptr);
ComputationalGraphBuilder graph(func.get());
BuildParallelMatmulGraph(graph, branchNum);
auto rawMagicsABefore = GetTensorRawMagics(graph, "srcA", branchNum);
auto rawMagicsBBefore = GetTensorRawMagics(graph, "srcB", branchNum);
EXPECT_EQ(rawMagicsABefore.size(), static_cast<size_t>(branchNum));
EXPECT_EQ(rawMagicsBBefore.size(), static_cast<size_t>(branchNum));
Function* function = graph.GetFunction();
function->paramConfigs_.sgPartitionAlgorithm = "Iso";
GraphPartition graphPartition;
EXPECT_EQ(graphPartition.PreCheck(*function), SUCCESS);
EXPECT_EQ(graphPartition.RunOnFunction(*function), SUCCESS);
EXPECT_EQ(graphPartition.PostCheck(*function), SUCCESS);
EXPECT_EQ(CountOpcode(*function, Opcode::OP_ASSEMBLE), 2);
auto matmulSubgraphIdsAfterPartition = GetMatmulSubgraphIds(graph, branchNum);
EXPECT_EQ(matmulSubgraphIdsAfterPartition.size(), static_cast<size_t>(branchNum));
const int funcMagic = function->GetFuncMagic();
std::string funcHashOrderKey = "func" + std::to_string(funcMagic) + "_0";
function->paramConfigs_.cubeL1ReuseSetting.clear();
function->paramConfigs_.cubeNBufferSetting.clear();
function->paramConfigs_.cubeL1ReuseSettingByFunc = {{"DEFAULT", 1}, {funcHashOrderKey, branchNum}};
function->paramConfigs_.cubeNBufferSettingByFunc = {{"DEFAULT", 1}, {funcHashOrderKey, 1}};
L1CopyInReuseMerge l1CopyInReuse;
EXPECT_EQ(l1CopyInReuse.RunOnFunction(*function), SUCCESS);
auto matmulSubgraphIdsAfterL1Reuse = GetMatmulSubgraphIds(graph, branchNum);
EXPECT_EQ(matmulSubgraphIdsAfterL1Reuse.size(), 1UL);
}
TEST_F(L1CopyInReuseTest, TestNoL1Num)
{
ComputationalGraphBuilder G;
std::vector<int64_t> tileShape{16, 16};
const int cube_nbuffer = 2;
const int result = 11;
auto shapeImme = OpImmediate::Specified(tileShape);
const int subGraphNum = 20;
InitGraphBuilder(G, tileShape, subGraphNum);
Function* function = G.GetFunction();
function->paramConfigs_.cubeNBufferSetting = {{1, 2}, {-1, cube_nbuffer}};
function->paramConfigs_.cubeL1ReuseSetting = {{1, 2}};
function->SetTotalSubGraphCount(subGraphNum);
L1CopyInReuseMerge LCRM;
EXPECT_EQ(LCRM.RunOnFunction(*function), SUCCESS);
EXPECT_EQ(function->GetTotalSubGraphCount(), result);
}
TEST_F(L1CopyInReuseTest, TestNoL1Map)
{
ComputationalGraphBuilder G;
std::vector<int64_t> tileShape{16, 16};
const int result = 5;
auto shapeImme = OpImmediate::Specified(tileShape);
const int subGraphNum = 20;
InitGraphBuilder(G, tileShape, subGraphNum);
Function* function = G.GetFunction();
function->paramConfigs_.cubeNBufferSetting = {{1, 2}, {-1, 4}};
function->paramConfigs_.cubeL1ReuseSetting = {{-1, 2}};
function->SetTotalSubGraphCount(subGraphNum);
L1CopyInReuseMerge LCRM;
EXPECT_EQ(LCRM.RunOnFunction(*function), SUCCESS);
EXPECT_EQ(function->GetTotalSubGraphCount(), result);
}
TEST_F(L1CopyInReuseTest, TestNoBufferMap)
{
ComputationalGraphBuilder G;
std::vector<int64_t> tileShape{16, 16};
const int result = 5;
const int subGraphNum = 20;
auto shapeImme = OpImmediate::Specified(tileShape);
InitGraphBuilder(G, tileShape, subGraphNum);
Function* function = G.GetFunction();
function->paramConfigs_.cubeNBufferSetting = {{-1, 4}};
function->paramConfigs_.cubeL1ReuseSetting = {{1, 2}, {-1, 2}};
function->SetTotalSubGraphCount(subGraphNum);
L1CopyInReuseMerge LCRM;
EXPECT_EQ(LCRM.RunOnFunction(*function), SUCCESS);
EXPECT_EQ(function->GetTotalSubGraphCount(), result);
}
TEST_F(L1CopyInReuseTest, TestNoParam)
{
ComputationalGraphBuilder G;
std::vector<int64_t> tileShape{16, 16};
const int result = 20;
auto shapeImme = OpImmediate::Specified(tileShape);
const int subGraphNum = 20;
InitGraphBuilder(G, tileShape, subGraphNum);
Function* function = G.GetFunction();
function->SetTotalSubGraphCount(subGraphNum);
L1CopyInReuseMerge LCRM;
EXPECT_EQ(LCRM.RunOnFunction(*function), SUCCESS);
EXPECT_EQ(function->GetTotalSubGraphCount(), result);
}
TEST_F(L1CopyInReuseTest, TestInvalidL1Num)
{
ComputationalGraphBuilder G;
std::vector<int64_t> tileShape{16, 16};
auto shapeImme = OpImmediate::Specified(tileShape);
const int subGraphNum = 20;
InitGraphBuilder(G, tileShape, subGraphNum);
Function* function = G.GetFunction();
function->paramConfigs_.cubeNBufferSetting = {{-1, 4}};
function->paramConfigs_.cubeL1ReuseSetting = {{-1, -1}};
function->SetTotalSubGraphCount(subGraphNum);
L1CopyInReuseMerge LCRM;
EXPECT_EQ(LCRM.RunOnFunction(*function), FAILED);
}
TEST_F(L1CopyInReuseTest, TestInvalidL1Map)
{
ComputationalGraphBuilder G;
std::vector<int64_t> tileShape{16, 16};
auto shapeImme = OpImmediate::Specified(tileShape);
const int subGraphNum = 20;
InitGraphBuilder(G, tileShape, subGraphNum);
Function* function = G.GetFunction();
function->paramConfigs_.cubeNBufferSetting = {{-2, 2}, {-1, 4}};
function->paramConfigs_.cubeL1ReuseSetting = {{-2, 2}, {-1, 2}};
function->SetTotalSubGraphCount(subGraphNum);
L1CopyInReuseMerge LCRM;
EXPECT_EQ(LCRM.RunOnFunction(*function), SUCCESS);
function->paramConfigs_.cubeL1ReuseSetting = {{-1, -3}};
EXPECT_EQ(LCRM.RunOnFunction(*function), FAILED);
function->paramConfigs_.cubeL1ReuseSetting = {{-2, 2}};
function->paramConfigs_.cubeNBufferSetting = {{-1, -5}};
EXPECT_EQ(LCRM.RunOnFunction(*function), FAILED);
}
TEST_F(L1CopyInReuseTest, TestHealthReport)
{
ComputationalGraphBuilder G;
std::vector<int64_t> tileShape{16, 16};
const int result = 5;
const int subGraphNum = 20;
InitGraphBuilder(G, tileShape, subGraphNum);
Function* function = G.GetFunction();
function->paramConfigs_.cubeNBufferSetting = {{1, 2}, {-1, 4}};
function->paramConfigs_.cubeL1ReuseSetting = {{1, 2}, {-1, 2}};
function->SetTotalSubGraphCount(subGraphNum);
L1CopyInReuseMerge LCRM;
EXPECT_EQ(LCRM.RunOnFunction(*function), SUCCESS);
EXPECT_EQ(function->GetTotalSubGraphCount(), result);
nlohmann::json report;
const int maxFaninOpsResult = 29;
const int maxFanoutOpsResult = 1;
const int totalOpCount = 30;
const int peakMemoryUsage = 512;
const int copyDataCount = 0;
CalcOperatorInfo(*function, report);
EXPECT_EQ(report["totalOpCount"], totalOpCount);
EXPECT_EQ(report["peakMemory"]["peakMemoryUsage"], peakMemoryUsage);
EXPECT_EQ(report["copyDataCount"], copyDataCount);
std::vector<std::vector<int>> inMap;
std::vector<std::vector<int>> outMap;
std::vector<bool> actualMagic;
GetOpConnectionMap(*function, inMap, outMap, actualMagic);
CalcGraphMetrics(inMap, outMap, actualMagic, report);
EXPECT_EQ(report["maxFaninOps"].size(), maxFaninOpsResult);
EXPECT_EQ(report["maxFanoutOps"].size(), maxFanoutOpsResult);
nlohmann::json reportNull;
function->SetFunctionType(FunctionType::DYNAMIC);
EXPECT_EQ(LCRM.RunOnFunction(*function), SUCCESS);
CalcOperatorInfo(*function, reportNull);
EXPECT_EQ(reportNull["peakMemory"], nullptr);
EXPECT_EQ(reportNull["copyDataCount"], nullptr);
}
TEST_F(L1CopyInReuseTest, TestGeneralizationL1CopyIn)
{
ComputationalGraphBuilder G;
std::vector<int64_t> tileShape{16, 16};
auto shapeImme = OpImmediate::Specified(tileShape);
const int result = 5;
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, {"incast0", "incast1", "outcast0", "outcast1"}), true);
EXPECT_EQ(G.AddOps({Opcode::OP_VIEW}, {{"incast0"}}, {{"incast1"}}, {"view"}, true), true);
G.GetOp("view")->UpdateSubgraphID(0);
const int subGraphNum = 20;
G.GetTensor("incast1")->tensor->rawmagic = 1;
G.GetTensor("incast1")->SetMemoryTypeOriginal(MEM_DEVICE_DDR);
for (int i = 1; i < subGraphNum; i++) {
std::string strID = std::to_string(i);
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, {"tensor" + strID}), true);
std::vector<Opcode> opLists{Opcode::OP_CONVERT, Opcode::OP_MUL};
std::vector<std::vector<std::string>> iOperands{{"incast1"}, {"tensor" + strID}};
std::vector<std::vector<std::string>> oOperands{{"tensor" + strID}, {"outcast0", "outcast1"}};
std::vector<std::string> opNames{"CONVERT_" + strID, "MUL_" + strID};
EXPECT_EQ(G.AddOps(opLists, iOperands, oOperands, opNames, true), true);
G.GetOp("CONVERT_" + strID)->UpdateSubgraphID(i);
G.GetOp("MUL_" + strID)->UpdateSubgraphID(i);
G.GetOp("CONVERT_" + strID)->SetOpAttribute(std::make_shared<ConvertOpAttribute>(MEM_DEVICE_DDR, MEM_L1));
G.GetTensor("tensor" + strID)->SetMemoryTypeOriginal(MEM_L1);
}
EXPECT_EQ(G.SetInCast({"incast0"}), true);
EXPECT_EQ(G.SetOutCast({"outcast0", "outcast1"}), true);
Function* function = G.GetFunction();
function->paramConfigs_.cubeNBufferSetting = {{1, 2}, {-1, 4}};
function->paramConfigs_.cubeL1ReuseSetting = {{1, 2}, {-1, 2}};
function->SetTotalSubGraphCount(subGraphNum);
PassManager& passManager = PassManager::Instance();
passManager.RegisterStrategy(
"myStrategy", {
{"L1CopyInReuseMerge", PassName::L1_COPY_IN_REUSE_MERGE},
});
auto ret = passManager.RunPass(Program::GetInstance(), *function, "myStrategy");
EXPECT_EQ(ret, SUCCESS);
EXPECT_EQ(function->GetTotalSubGraphCount(), result);
}
TEST_F(L1CopyInReuseTest, TestTensorReuseFailed)
{
ComputationalGraphBuilder G;
std::vector<int64_t> tileShape{16, 16};
auto shapeImme = OpImmediate::Specified(tileShape);
const int subGraphNum = 20;
InitGraphBuilder(G, tileShape, subGraphNum);
for (int i = 1; i < subGraphNum; i++) {
std::string strID = std::to_string(i);
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, {"tensor_before" + strID}), true);
std::vector<Opcode> opLists{Opcode::OP_EXP};
std::vector<std::vector<std::string>> iOperands{{"tensor_before" + strID}};
std::vector<std::vector<std::string>> oOperands{{"tensor" + strID}};
std::vector<std::string> opNames{"EXP_BEFORE_" + strID};
EXPECT_EQ(G.AddOps(opLists, iOperands, oOperands, opNames, true), true);
G.GetOp("EXP_BEFORE_" + strID)->UpdateSubgraphID(i);
G.GetTensor("tensor_before" + strID)->SetMemoryTypeOriginal(MEM_L1);
}
G.GetTensor("tensor_before1")->tensor->datatype = DataType::DT_FP16;
G.GetTensor("tensor1")->tensor->datatype = DataType::DT_FP16;
Function* function = G.GetFunction();
function->paramConfigs_.cubeL1ReuseSetting = {{1, 2}, {-1, 2}};
function->SetTotalSubGraphCount(subGraphNum);
L1CopyInReuseMerge LCRM;
EXPECT_EQ(LCRM.RunOnFunction(*function), FAILED);
}
TEST_F(L1CopyInReuseTest, TestSemanticLabelSetting)
{
ComputationalGraphBuilder G;
std::vector<int64_t> tileShape{16, 16};
const int subGraphNum = 20;
InitGraphBuilder(G, tileShape, subGraphNum);
auto cubeLabel = std::make_shared<SemanticLabel>("CubeLabel", __FILE__, __LINE__);
for (int i = 1; i <= 3; i++) {
std::string strID = std::to_string(i);
G.GetOp("EXP_" + strID)->SetSemanticLabel(cubeLabel);
}
Function* function = G.GetFunction();
function->paramConfigs_.cubeL1ReuseSetting = {{-1, 2}};
function->paramConfigs_.cubeL1ReuseSettingByLabel = {{"CubeLabel", 1}};
function->paramConfigs_.cubeNBufferSetting = {{-1, 4}};
function->paramConfigs_.cubeNBufferSettingByLabel = {{"CubeLabel", 2}};
function->SetTotalSubGraphCount(subGraphNum);
L1CopyInReuseMerge LCRM;
EXPECT_EQ(LCRM.RunOnFunction(*function), SUCCESS);
}
TEST_F(L1CopyInReuseTest, ByFuncL1ReuseFuncSpecificMerge)
{
ComputationalGraphBuilder G;
std::vector<int64_t> tileShape{16, 16};
const int subGraphNum = 20;
InitGraphBuilder(G, tileShape, subGraphNum);
Function* function = G.GetFunction();
int fm = function->GetFuncMagic();
std::string key = "func" + std::to_string(fm) + "_0";
function->paramConfigs_.cubeL1ReuseSettingByFunc = {{"DEFAULT", 2}, {key, 4}};
function->paramConfigs_.cubeNBufferSettingByFunc = {{"DEFAULT", 1}};
function->SetTotalSubGraphCount(subGraphNum);
L1CopyInReuseMerge LCRM;
EXPECT_EQ(LCRM.RunOnFunction(*function), SUCCESS);
EXPECT_EQ(function->GetTotalSubGraphCount(), 6);
}
TEST_F(L1CopyInReuseTest, ByFuncL1ReuseDefaultOneNoMerge)
{
ComputationalGraphBuilder G;
std::vector<int64_t> tileShape{16, 16};
const int subGraphNum = 20;
InitGraphBuilder(G, tileShape, subGraphNum);
Function* function = G.GetFunction();
function->paramConfigs_.cubeL1ReuseSettingByFunc = {{"DEFAULT", 1}};
function->paramConfigs_.cubeNBufferSettingByFunc = {{"DEFAULT", 1}};
function->SetTotalSubGraphCount(subGraphNum);
L1CopyInReuseMerge LCRM;
EXPECT_EQ(LCRM.RunOnFunction(*function), SUCCESS);
EXPECT_EQ(function->GetTotalSubGraphCount(), subGraphNum);
}
}
}