* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
* \file test_subgraph_to_function.cpp
* \brief Unit test for SubgraphToFunction pass.
*/
#include <gtest/gtest.h>
#include "symbolic_scalar_test_utils.h"
#include <memory>
#include <vector>
#include <nlohmann/json.hpp>
#include "interface/configs/config_manager.h"
#include "computational_graph_builder.h"
#include "tilefwk/data_type.h"
#include "interface/operation/attribute.h"
#include "tilefwk/tilefwk.h"
#include "interface/inner/tilefwk.h"
#include "interface/program/program.h"
#include "interface/function/function.h"
#include "interface/operation/operation.h"
#include "passes/tile_graph_pass/subgraph_to_function.h"
#include "passes/tile_graph_pass/static_subgraph_processor.h"
#include "passes/pass_mgr/pass_manager.h"
#include "passes/statistics/execute_graph_statistic.h"
#include "ut_json/ut_json_tool.h"
#include "interface/tensor/irbuilder.h"
namespace npu {
namespace tile_fwk {
class SubgraphToFunctionTest : public testing::Test {
public:
static void SetUpTestCase() {}
static void TearDownTestCase() {}
void SetUp() override
{
Program::GetInstance().Reset();
config::Reset();
config::SetHostOption(COMPILE_STAGE, CS_EXECUTE_GRAPH);
}
void TearDown() override {}
};
bool ArePsgHashesUnique(const Function& function)
{
std::unordered_set<size_t> hashSet;
for (const auto& [psgId, program] : function.programs_) {
(void)psgId;
size_t hashValue = program->ComputeHash().GetHash();
if (hashSet.find(hashValue) != hashSet.end()) {
return false;
}
hashSet.insert(hashValue);
}
return true;
}
bool IsPSgToESgMapOneToOne(const std::multimap<int, int>& PSgToESgMap)
{
std::unordered_map<int, int> esgToPsgMap;
for (const auto& [psgId, esgId] : PSgToESgMap) {
if (esgToPsgMap.find(esgId) != esgToPsgMap.end()) {
return false;
}
esgToPsgMap[esgId] = psgId;
}
return true;
}
std::multimap<int, int> GetPSgToESgMap(Function* rootFunc)
{
std::multimap<int, int> PSgToESgMap;
for (size_t i = 0; i < rootFunc->Operations().size(); i++) {
auto iter = rootFunc->Operations()[i].GetSubFuncInvokeInfo();
int PSgId = iter.GetProgramId();
int ESgId = rootFunc->Operations()[i].GetSubgraphID();
PSgToESgMap.insert({PSgId, ESgId});
}
return PSgToESgMap;
}
static std::shared_ptr<LogicalTensor> BuildDifferentOffsetSubgraph0(const std::shared_ptr<Function>& func)
{
auto shape3Imme = OpImmediate::Specified({16, 8, 8});
auto shape1Imme = OpImmediate::Specified({16, 64});
auto incast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{32, 8, 8}, CreateTestConstIntVector(std::vector<int64_t>{32, 8, 8}));
incast->SetMemoryTypeBoth(MEM_DEVICE_DDR);
incast->SetMagic(3);
func->inCasts_.push_back(incast);
auto tensor0 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{16, 8, 8}, CreateTestConstIntVector(std::vector<int64_t>{16, 8, 8}));
tensor0->SetMemoryTypeBoth(MEM_UB);
tensor0->SetMagic(15);
auto& copyopin0 = PassOperationUtils::AddOperation(*func, Opcode::OP_COPY_IN, {incast}, {tensor0});
copyopin0.SetOpAttribute(std::make_shared<CopyOpAttribute>(
OpImmediate::Specified({16, 0, 0}), MEM_UB, shape3Imme, shape3Imme, std::vector<npu::tile_fwk::OpImmediate>()));
copyopin0.UpdateSubgraphID(0);
copyopin0.opmagic = 10032;
auto tensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{16, 64}, CreateTestConstIntVector(std::vector<int64_t>{16, 64}));
tensor1->SetMemoryTypeBoth(MEM_UB);
tensor1->SetMagic(66);
auto& reshapeop = PassOperationUtils::AddOperation(*func, Opcode::OP_RESHAPE, {tensor0}, {tensor1});
reshapeop.UpdateSubgraphID(0);
reshapeop.opmagic = 10039;
auto input_tensor = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{16, 64}, CreateTestConstIntVector(std::vector<int64_t>{16, 64}));
input_tensor->SetMemoryTypeBoth(MEM_DEVICE_DDR);
input_tensor->SetMagic(79);
auto& copyoutop0 = PassOperationUtils::AddOperation(*func, Opcode::OP_COPY_OUT, {tensor1}, {input_tensor});
copyoutop0.SetOpAttribute(std::make_shared<CopyOpAttribute>(
MEM_UB, OpImmediate::Specified({0, 0}), shape1Imme, shape1Imme, std::vector<npu::tile_fwk::OpImmediate>()));
copyoutop0.UpdateSubgraphID(0);
copyoutop0.opmagic = 10043;
return input_tensor;
}
static void BuildDifferentOffsetSubgraph1(const std::shared_ptr<Function>& func,
const std::shared_ptr<LogicalTensor>& input_tensor, const std::shared_ptr<LogicalTensor>& output_tensor)
{
auto shape2Imme = OpImmediate::Specified({16, 32});
auto inner_tensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{16, 32}, CreateTestConstIntVector(std::vector<int64_t>{16, 32}));
inner_tensor1->SetMemoryTypeBoth(MEM_UB);
inner_tensor1->UpdateOffset({0, 0});
inner_tensor1->SetMagic(30);
auto& copyopin1 = PassOperationUtils::AddOperation(*func, Opcode::OP_COPY_IN, {input_tensor}, {inner_tensor1});
copyopin1.SetOpAttribute(std::make_shared<CopyOpAttribute>(
OpImmediate::Specified({0, 0}), MEM_UB, shape2Imme, shape2Imme, std::vector<npu::tile_fwk::OpImmediate>()));
copyopin1.UpdateSubgraphID(1);
copyopin1.opmagic = 10021;
auto result_tensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{16, 32}, CreateTestConstIntVector(std::vector<int64_t>{16, 32}));
result_tensor1->SetMemoryTypeBoth(MEM_UB);
result_tensor1->SetMagic(29);
auto& expopin1 = PassOperationUtils::AddOperation(*func, Opcode::OP_EXP, {inner_tensor1}, {result_tensor1});
expopin1.UpdateSubgraphID(1);
expopin1.opmagic = 10023;
auto& copyoutop1 = PassOperationUtils::AddOperation(*func, Opcode::OP_COPY_OUT, {result_tensor1}, {output_tensor});
copyoutop1.SetOpAttribute(std::make_shared<CopyOpAttribute>(
MEM_UB, OpImmediate::Specified({0, 0}), shape2Imme, shape2Imme, std::vector<npu::tile_fwk::OpImmediate>()));
copyoutop1.UpdateSubgraphID(1);
copyoutop1.opmagic = 10029;
}
static void BuildDifferentOffsetSubgraph2(const std::shared_ptr<Function>& func,
const std::shared_ptr<LogicalTensor>& input_tensor, const std::shared_ptr<LogicalTensor>& output_tensor)
{
auto shape2Imme = OpImmediate::Specified({16, 32});
auto inner_tensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{16, 32}, CreateTestConstIntVector(std::vector<int64_t>{16, 32}));
inner_tensor2->SetMemoryTypeBoth(MEM_UB);
inner_tensor2->UpdateOffset({0, 32});
inner_tensor2->SetMagic(35);
auto& copyopin2 = PassOperationUtils::AddOperation(*func, Opcode::OP_COPY_IN, {input_tensor}, {inner_tensor2});
copyopin2.SetOpAttribute(std::make_shared<CopyOpAttribute>(
OpImmediate::Specified({0, 32}), MEM_UB, shape2Imme, shape2Imme, std::vector<npu::tile_fwk::OpImmediate>()));
copyopin2.UpdateSubgraphID(2);
copyopin2.opmagic = 10024;
auto result_tensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{16, 32}, CreateTestConstIntVector(std::vector<int64_t>{16, 32}));
result_tensor2->SetMemoryTypeBoth(MEM_UB);
result_tensor2->SetMagic(34);
auto& expopin2 = PassOperationUtils::AddOperation(*func, Opcode::OP_EXP, {inner_tensor2}, {result_tensor2});
expopin2.UpdateSubgraphID(2);
expopin2.opmagic = 10026;
auto& copyoutop2 = PassOperationUtils::AddOperation(*func, Opcode::OP_COPY_OUT, {result_tensor2}, {output_tensor});
copyoutop2.SetOpAttribute(std::make_shared<CopyOpAttribute>(
MEM_UB, OpImmediate::Specified({0, 32}), shape2Imme, shape2Imme, std::vector<npu::tile_fwk::OpImmediate>()));
copyoutop2.UpdateSubgraphID(2);
copyoutop2.opmagic = 10030;
}
TEST_F(SubgraphToFunctionTest, DifferentOffset)
{
auto func = std::make_shared<Function>(Program::GetInstance(), "TILE_DifferentOffset", "TILE_DifferentOffset", nullptr);
EXPECT_TRUE(func != nullptr);
config::SetPassConfig("PVC2_OOO", "SubgraphToFunction", "use_max_freq_label", true);
Program::GetInstance().InsertFuncToFunctionMap("TILE_DifferentOffset", func);
auto input_tensor = BuildDifferentOffsetSubgraph0(func);
auto output_tensor = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{16, 64}, CreateTestConstIntVector(std::vector<int64_t>{16, 64}));
output_tensor->SetMemoryTypeBoth(MEM_DEVICE_DDR);
output_tensor->SetMagic(7);
func->outCasts_.push_back(output_tensor);
BuildDifferentOffsetSubgraph1(func, input_tensor, output_tensor);
BuildDifferentOffsetSubgraph2(func, input_tensor, output_tensor);
func->SetTotalSubGraphCount(3);
SubgraphToFunction pass;
pass.PreCheck(*func);
pass.RunOnFunction(*func);
pass.PostCheck(*func);
auto rootFunc = func->rootFunc_;
EXPECT_NE(rootFunc, nullptr);
const auto& PSgToESgMap = GetPSgToESgMap(rootFunc);
std::unordered_set<int> uniquePSgIds;
for (const auto& pair : PSgToESgMap) {
uniquePSgIds.insert(pair.first);
}
EXPECT_EQ(uniquePSgIds.size(), func->GetTotalSubGraphCount());
EXPECT_TRUE(ArePsgHashesUnique(*rootFunc));
EXPECT_TRUE(IsPSgToESgMapOneToOne(PSgToESgMap));
}
static void BuildSameOffsetSubgraph0(const std::shared_ptr<Function>& func,
const std::shared_ptr<LogicalTensor>& input_tensor, const std::shared_ptr<LogicalTensor>& output_tensor)
{
auto shape2Imme = OpImmediate::Specified({16, 32});
auto inner_tensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{16, 32}, CreateTestConstIntVector(std::vector<int64_t>{16, 32}));
inner_tensor1->UpdateOffset({0, 0});
inner_tensor1->SetMagic(30);
inner_tensor1->SetMemoryTypeBoth(MEM_UB);
auto& copyopin1 = PassOperationUtils::AddOperation(*func, Opcode::OP_COPY_IN, {input_tensor}, {inner_tensor1});
copyopin1.UpdateSubgraphID(0);
copyopin1.opmagic = 10021;
copyopin1.SetOpAttribute(std::make_shared<CopyOpAttribute>(
OpImmediate::Specified({0, 0}), MEM_UB, shape2Imme, shape2Imme, std::vector<npu::tile_fwk::OpImmediate>()));
auto result_tensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{16, 32}, CreateTestConstIntVector(std::vector<int64_t>{16, 32}));
result_tensor1->SetMemoryTypeBoth(MEM_UB);
result_tensor1->SetMagic(29);
auto& expopin1 = PassOperationUtils::AddOperation(*func, Opcode::OP_EXP, {inner_tensor1}, {result_tensor1});
expopin1.UpdateSubgraphID(0);
expopin1.opmagic = 10023;
auto& copyoutop1 = PassOperationUtils::AddOperation(*func, Opcode::OP_COPY_OUT, {result_tensor1}, {output_tensor});
copyoutop1.SetOpAttribute(std::make_shared<CopyOpAttribute>(
MEM_UB, OpImmediate::Specified({0, 0}), shape2Imme, shape2Imme, std::vector<npu::tile_fwk::OpImmediate>()));
copyoutop1.UpdateSubgraphID(0);
copyoutop1.opmagic = 10029;
}
static void BuildSameOffsetSubgraph1(const std::shared_ptr<Function>& func,
const std::shared_ptr<LogicalTensor>& input_tensor, const std::shared_ptr<LogicalTensor>& output_tensor)
{
auto shape2Imme = OpImmediate::Specified({16, 32});
auto inner_tensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{16, 32}, CreateTestConstIntVector(std::vector<int64_t>{16, 32}));
inner_tensor2->SetMemoryTypeBoth(MEM_UB);
inner_tensor2->UpdateOffset({0, 0});
inner_tensor2->SetMagic(35);
auto& copyopin2 = PassOperationUtils::AddOperation(*func, Opcode::OP_COPY_IN, {input_tensor}, {inner_tensor2});
copyopin2.SetOpAttribute(std::make_shared<CopyOpAttribute>(
OpImmediate::Specified({0, 0}), MEM_UB, shape2Imme, shape2Imme, std::vector<npu::tile_fwk::OpImmediate>()));
copyopin2.UpdateSubgraphID(1);
copyopin2.opmagic = 10024;
auto result_tensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{16, 32}, CreateTestConstIntVector(std::vector<int64_t>{16, 32}));
result_tensor2->SetMemoryTypeBoth(MEM_UB);
result_tensor2->SetMagic(34);
auto& expopin2 = PassOperationUtils::AddOperation(*func, Opcode::OP_EXP, {inner_tensor2}, {result_tensor2});
expopin2.UpdateSubgraphID(1);
expopin2.opmagic = 10026;
auto& copyoutop2 = PassOperationUtils::AddOperation(*func, Opcode::OP_COPY_OUT, {result_tensor2}, {output_tensor});
copyoutop2.SetOpAttribute(std::make_shared<CopyOpAttribute>(
MEM_UB, OpImmediate::Specified({16, 0}), shape2Imme, shape2Imme, std::vector<npu::tile_fwk::OpImmediate>()));
copyoutop2.UpdateSubgraphID(1);
copyoutop2.opmagic = 10030;
}
TEST_F(SubgraphToFunctionTest, SameOffset)
{
auto func = std::make_shared<Function>(Program::GetInstance(), "TILE_SameOffset", "TILE_SameOffset", nullptr);
EXPECT_TRUE(func != nullptr);
Program::GetInstance().InsertFuncToFunctionMap("TILE_SameOffset", func);
auto input_tensor = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{16, 64}, CreateTestConstIntVector(std::vector<int64_t>{16, 64}));
input_tensor->SetMemoryTypeBoth(MEM_DEVICE_DDR);
input_tensor->SetMagic(79);
auto output_tensor = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{32, 32}, CreateTestConstIntVector(std::vector<int64_t>{32, 32}));
output_tensor->SetMemoryTypeBoth(MEM_DEVICE_DDR);
output_tensor->SetMagic(7);
BuildSameOffsetSubgraph0(func, input_tensor, output_tensor);
BuildSameOffsetSubgraph1(func, input_tensor, output_tensor);
func->inCasts_.push_back(input_tensor);
func->outCasts_.push_back(output_tensor);
func->SetTotalSubGraphCount(2);
Json progDump;
progDump["version"] = "2.0";
progDump["functions"].push_back(func->DumpJson());
std::ofstream file("Before_subgraphIsomorphismPass.json");
file << progDump.dump() << std::endl;
file.close();
SubgraphToFunction pass;
pass.PreCheck(*func);
pass.RunOnFunction(*func);
pass.PostCheck(*func);
auto rootFunc = func->rootFunc_;
EXPECT_NE(rootFunc, nullptr);
const auto& PSgToESgMap = GetPSgToESgMap(rootFunc);
std::unordered_set<int> uniquePSgIds;
for (const auto& pair : PSgToESgMap) {
uniquePSgIds.insert(pair.first);
}
size_t mergedSubgraphCount = uniquePSgIds.size();
EXPECT_EQ(mergedSubgraphCount, 2);
EXPECT_TRUE(ArePsgHashesUnique(*rootFunc));
EXPECT_TRUE(IsPSgToESgMapOneToOne(PSgToESgMap));
}
TEST_F(SubgraphToFunctionTest, test_json_dump_and_load_1)
{
int32_t shape0 = 4;
int32_t shape1 = 32;
int32_t k = 8;
bool isLargest = true;
PROGRAM("TOPK")
{
std::vector<int64_t> input_shape = {shape0, shape1};
std::vector<int64_t> output_shape = {shape0, k};
TileShape::Current().SetVecTile({shape0, shape1});
Tensor input_a(DT_FP32, input_shape, (uint8_t*)nullptr, "A");
auto output = std::make_tuple(
Tensor(DT_FP32, output_shape, nullptr, "npu_val"), Tensor(DT_FP32, output_shape, nullptr, "resDics"));
config::SetBuildStatic(true);
FUNCTION("TOPK_T", {input_a, std::get<0>(output), std::get<1>(output)})
{
output = TopK(input_a, k, -1, isLargest);
}
}
Json programJson = Program::GetInstance().DumpJson();
Program::GetInstance().LoadJson(programJson);
Json programJsonNew = Program::GetInstance().DumpJson();
Program::GetInstance().LoadJson(programJsonNew);
Json programJsonNewNew = Program::GetInstance().DumpJson();
#ifndef PRIOR_SCHEDULING
EXPECT_EQ(programJsonNew.dump(), programJsonNewNew.dump());
#endif
config::SetHostOption(COMPILE_STAGE, CS_ALL_COMPLETE);
}
TEST_F(SubgraphToFunctionTest, test_json_dump_and_load_1_cov)
{
int32_t shape0 = 4;
int32_t shape1 = 32;
int32_t k = 8;
bool isLargest = true;
PROGRAM("TOPK")
{
std::vector<int64_t> input_shape = {shape0, shape1};
std::vector<int64_t> output_shape = {shape0, k};
TileShape::Current().SetVecTile({shape0, shape1});
Tensor input_a(DT_FP32, input_shape, (uint8_t*)nullptr, "A");
auto output = std::make_tuple(
Tensor(DT_FP32, output_shape, nullptr, "npu_val"), Tensor(DT_FP32, output_shape, nullptr, "resDics"));
config::SetBuildStatic(true);
FUNCTION("TOPK_T", {input_a, std::get<0>(output), std::get<1>(output)})
{
output = TopK(input_a, k, -1, isLargest);
}
}
Json programJson = Program::GetInstance().DumpJson();
Program::GetInstance().LoadJson(programJson);
Json programJsonNew = Program::GetInstance().DumpJson();
Program::GetInstance().LoadJson(programJsonNew);
Json programJsonNewNew = Program::GetInstance().DumpJson();
config::SetHostOption(COMPILE_STAGE, CS_ALL_COMPLETE);
}
TEST_F(SubgraphToFunctionTest, test_json_dump_and_load_2)
{
IfaTileShapeConfig tileConfig{
256,
32,
{256, 128},
{32, 32, 64, 64, 256, 256},
{32, 256},
{32, 32, 64, 64, 256, 256},
{32, 256},
};
const int b = 4;
const int nq = 32;
const int s2 = 256;
const int blockSize = tileConfig.blockSize;
const int sq = 1;
const int dn = 512;
const int dr = 64;
const int nkv = 1;
std::vector<int> actSeqs(b, s2);
const float softmaxScale = 0.8f;
int blockNum = 0;
for (auto s : actSeqs) {
blockNum += CeilDiv(s, blockSize);
}
config::SetHostOption(COMPILE_STAGE, CS_EXECUTE_GRAPH);
PROGRAM("PageAttentionStatic")
{
Tensor qNope(DT_BF16, {b * sq * nq, dn}, (uint8_t*)nullptr, "qNope");
Tensor qRope(DT_BF16, {b * sq * nq, dr}, (uint8_t*)nullptr, "qRope");
Tensor kNopeCache(DT_BF16, {blockNum * blockSize * nkv, dn}, (uint8_t*)nullptr, "kNopeCache");
Tensor kRopeCache(DT_BF16, {blockNum * blockSize * nkv, dr}, (uint8_t*)nullptr, "kRope");
Tensor vNopeCache(DT_BF16, {blockNum * blockSize * nkv, dn}, (uint8_t*)nullptr, "vNopeCache");
int maxSeqAllBatch = *(std::max_element(actSeqs.begin(), actSeqs.end()));
int maxBlockNumPerBatch = CeilDiv(maxSeqAllBatch, blockSize);
std::vector<std::vector<int>> blockTable(b, std::vector<int>(maxBlockNumPerBatch, 0));
Tensor attentionOut(DT_FP32, {b * sq * nq, dn}, nullptr, "attentionOut");
config::SetBuildStatic(true);
FUNCTION("IfaStatic", {qNope, kNopeCache, vNopeCache, qRope, kRopeCache, attentionOut})
{
IncreFlashAttention(
qNope, kNopeCache, vNopeCache, qRope, kRopeCache, blockTable, actSeqs, softmaxScale, attentionOut,
tileConfig);
}
}
Json programJson = Program::GetInstance().DumpJson();
Program::GetInstance().LoadJson(programJson);
Json programJsonNew = Program::GetInstance().DumpJson();
Program::GetInstance().LoadJson(programJsonNew);
Json programJsonNewNew = Program::GetInstance().DumpJson();
#ifndef PRIOR_SCHEDULING
EXPECT_EQ(programJsonNew.dump(), programJsonNewNew.dump());
#endif
config::SetHostOption(COMPILE_STAGE, CS_ALL_COMPLETE);
}
* input -> view1(01) -> view1_out -/
* add(03) -> add_out -> abc(04) -> final_out
* input -> View2(01) -> view2_out -/
*/
void InitGraphBuilder(ComputationalGraphBuilder& G, std::vector<int64_t> tileShape)
{
std::vector<std::string> tensorNames = {"input", "view1_out", "view2_out", "add_out", "final_out"};
std::vector<Opcode> opCodes = {Opcode::OP_VIEW, Opcode::OP_VIEW, Opcode::OP_ADD, Opcode::OP_ABS};
std::vector<std::vector<std::string>> ioperands = {
{"input"},
{"input"},
{"view1_out", "view2_out"},
{"add_out"}
};
std::vector<std::vector<std::string>> ooperands = {{"view1_out"}, {"view2_out"}, {"add_out"}, {"final_out"}};
std::vector<std::string> opNames = {"view1", "view2", "add", "abs_final"};
EXPECT_TRUE(G.AddTensors(DataType::DT_FP32, tileShape, tensorNames));
EXPECT_TRUE(G.AddOps(opCodes, ioperands, ooperands, opNames, true));
auto input_tensor = G.GetTensor("input");
auto final_out_tensor = G.GetTensor("final_out");
input_tensor->SetMemoryTypeBoth(MemoryType::MEM_DEVICE_DDR);
final_out_tensor->SetMemoryTypeBoth(MemoryType::MEM_DEVICE_DDR);
EXPECT_TRUE(G.SetInCast({"input"}));
EXPECT_TRUE(G.SetOutCast({"final_out"}));
for (const auto& opName : opNames) {
G.GetOp(opName)->UpdateSubgraphID(0);
}
}
TEST_F(SubgraphToFunctionTest, TestBasicSubgraphConversion)
{
ComputationalGraphBuilder G;
std::vector<int64_t> tileShape{16, 16};
InitGraphBuilder(G, tileShape);
Function* function = G.GetFunction();
ASSERT_NE(function, nullptr);
function->SetTotalSubGraphCount(1);
SubgraphToFunction pass;
Status status = pass.RunOnFunction(*function);
status = pass.PostCheck(*function);
EXPECT_EQ(status, SUCCESS);
Function* rootFunc = function->rootFunc_;
ASSERT_NE(rootFunc, nullptr);
EXPECT_EQ(rootFunc->GetGraphType(), GraphType::EXECUTE_GRAPH);
const auto& topoInfo = rootFunc->topoInfo_;
EXPECT_EQ(topoInfo.topology_.size(), 1);
auto leafFunc = rootFunc->programs_.begin()->second;
EXPECT_EQ(leafFunc->Operations().size(), 4);
}
static Function* BuildIsomorphicSubgraphsWithDifferentVecHashOrder(ComputationalGraphBuilder& G)
{
EXPECT_TRUE(G.AddTensors(DataType::DT_FP32, {16, 16}, {"input", "out0", "out1"}));
EXPECT_TRUE(G.AddOps(
{Opcode::OP_ABS, Opcode::OP_ABS}, {{"input"}, {"input"}}, {{"out0"}, {"out1"}}, {"abs0", "abs1"}, true));
G.GetTensor("input")->SetMemoryTypeBoth(MemoryType::MEM_DEVICE_DDR);
G.GetTensor("out0")->SetMemoryTypeBoth(MemoryType::MEM_DEVICE_DDR);
G.GetTensor("out1")->SetMemoryTypeBoth(MemoryType::MEM_DEVICE_DDR);
EXPECT_TRUE(G.SetInCast({"input"}));
EXPECT_TRUE(G.SetOutCast({"out0", "out1"}));
G.GetOp("abs0")->UpdateSubgraphID(0);
G.GetOp("abs1")->UpdateSubgraphID(1);
G.GetOp("abs0")->SetHashOrderInfo(
OpAttributeKey::vecMergeHashOrder, OpAttributeKey::vecMergeSubgraphCount, "func8_0", 4);
G.GetOp("abs1")->SetHashOrderInfo(
OpAttributeKey::vecMergeHashOrder, OpAttributeKey::vecMergeSubgraphCount, "func9_0", 1);
auto* function = G.GetFunction();
function->SetTotalSubGraphCount(2);
return function;
}
TEST_F(SubgraphToFunctionTest, PreservesHashOrderInfoOnIsomorphicCallOps)
{
ComputationalGraphBuilder G;
Function* function = BuildIsomorphicSubgraphsWithDifferentVecHashOrder(G);
SubgraphToFunction pass;
EXPECT_EQ(pass.RunOnFunction(*function), SUCCESS);
Function* rootFunc = function->rootFunc_;
ASSERT_NE(rootFunc, nullptr);
EXPECT_EQ(rootFunc->programs_.size(), 1);
const auto& callOps = rootFunc->Operations();
ASSERT_EQ(callOps.size(), 2);
auto firstInfo = callOps[0].GetHashOrderInfo(OpAttributeKey::vecMergeHashOrder,
OpAttributeKey::vecMergeSubgraphCount);
auto secondInfo = callOps[1].GetHashOrderInfo(OpAttributeKey::vecMergeHashOrder,
OpAttributeKey::vecMergeSubgraphCount);
EXPECT_EQ(firstInfo.first, "func8_0");
EXPECT_EQ(firstInfo.second, 4);
EXPECT_EQ(secondInfo.first, "func9_0");
EXPECT_EQ(secondInfo.second, 1);
}
static void BuildMultiSubgraphDependencyGraph(ComputationalGraphBuilder& G)
{
std::vector<std::string> tensorNames = {"input", "aic_out", "aiv_out", "final_out"};
std::vector<Opcode> opCodes = {Opcode::OP_A_MUL_B, Opcode::OP_ADD, Opcode::OP_EXP};
std::vector<std::vector<std::string>> ioperands = {{"input"}, {"aic_out"}, {"aiv_out"}};
std::vector<std::vector<std::string>> ooperands = {{"aic_out"}, {"aiv_out"}, {"final_out"}};
std::vector<std::string> opNames = {"matmul_aic", "add_aiv", "exp_aicpu"};
EXPECT_TRUE(G.AddTensors(DataType::DT_FP32, {16, 16}, tensorNames));
EXPECT_TRUE(G.AddOps(opCodes, ioperands, ooperands, opNames, true));
G.GetOp("matmul_aic")->UpdateSubgraphID(0);
G.GetOp("matmul_aic")->SetCoreType(CoreType::AIC);
G.GetOp("matmul_aic")->SetAttribute(OpAttributeKey::isCube, true);
G.GetOp("add_aiv")->UpdateSubgraphID(1);
G.GetOp("add_aiv")->SetCoreType(CoreType::AIV);
G.GetOp("exp_aicpu")->UpdateSubgraphID(2);
G.GetOp("exp_aicpu")->SetCoreType(CoreType::AICPU);
G.GetTensor("input")->SetMemoryTypeBoth(MemoryType::MEM_DEVICE_DDR);
G.GetTensor("final_out")->SetMemoryTypeBoth(MemoryType::MEM_DEVICE_DDR);
EXPECT_TRUE(G.SetInCast({"input"}));
EXPECT_TRUE(G.SetOutCast({"final_out"}));
}
static void VerifyMultiSubgraphDependency(Function* rootFunc)
{
ASSERT_NE(rootFunc, nullptr);
EXPECT_EQ(rootFunc->programs_.size(), 3);
const auto& topoInfo = rootFunc->topoInfo_;
EXPECT_EQ(topoInfo.topology_.size(), 3);
EXPECT_EQ(topoInfo.topology_[0].outGraph, std::unordered_set<int>{1});
EXPECT_EQ(topoInfo.topology_[1].outGraph, std::unordered_set<int>{2});
EXPECT_TRUE(topoInfo.topology_[2].outGraph.empty());
EXPECT_EQ(topoInfo.topology_[0].readyState, 0);
EXPECT_EQ(topoInfo.topology_[1].readyState, -1);
EXPECT_EQ(topoInfo.topology_[2].readyState, -1);
const auto& callOps = rootFunc->Operations();
ASSERT_EQ(callOps.size(), 3);
auto check_graph_type = [&callOps](size_t idx, CoreType expected) {
auto attr = dynamic_cast<CallOpAttribute*>(callOps[idx].GetOpAttribute().get());
ASSERT_NE(attr, nullptr);
EXPECT_EQ(attr->invokeInfo_->GetGraphType(), expected);
};
check_graph_type(0, CoreType::AIC);
check_graph_type(1, CoreType::AIV);
check_graph_type(2, CoreType::AICPU);
EXPECT_EQ(rootFunc->GetReadySubGraphCount(CoreType::AIC), 1);
EXPECT_EQ(rootFunc->GetReadySubGraphCount(CoreType::AIV), 0);
EXPECT_EQ(rootFunc->GetReadySubGraphCount(CoreType::AICPU), 0);
}
TEST_F(SubgraphToFunctionTest, MultiSubgraphDependencyWithMixedOps)
{
ComputationalGraphBuilder G;
BuildMultiSubgraphDependencyGraph(G);
Function* function = G.GetFunction();
ASSERT_NE(function, nullptr);
function->SetTotalSubGraphCount(3);
SubgraphToFunction pass;
Status status = pass.RunOnFunction(*function);
EXPECT_EQ(status, SUCCESS);
VerifyMultiSubgraphDependency(function->rootFunc_);
}
TEST_F(SubgraphToFunctionTest, EliminateRedundantEdges)
{
ComputationalGraphBuilder G;
std::vector<std::string> tensorNames{"t0", "t1", "t2", "t3", "t4", "t5", "t6"};
std::vector<Opcode> opCodes{
Opcode::OP_ADD,
Opcode::OP_CONV2D,
Opcode::OP_ABS,
Opcode::OP_ADD,
Opcode::OP_MAXIMUM
};
std::vector<std::vector<std::string>> ioperands{
{"t0", "t1"},
{"t2"},
{"t2"},
{"t3", "t4"},
{"t4", "t5"}
};
std::vector<std::vector<std::string>> ooperands{{"t2"}, {"t3"}, {"t4"}, {"t5"}, {"t6"}};
std::vector<std::string> opNames{"ADD1_SG0", "CONV_SG1", "ABS_SG2", "ADD2_SG3", "MAX_SG4"};
EXPECT_TRUE(G.AddTensors(DataType::DT_FP32, {16, 16}, tensorNames));
EXPECT_TRUE(G.AddOps(opCodes, ioperands, ooperands, opNames, true));
G.GetOp("ADD1_SG0")->UpdateSubgraphID(0);
G.GetOp("CONV_SG1")->UpdateSubgraphID(1);
G.GetOp("ABS_SG2")->UpdateSubgraphID(2);
G.GetOp("ADD2_SG3")->UpdateSubgraphID(3);
G.GetOp("MAX_SG4")->UpdateSubgraphID(4);
EXPECT_TRUE(G.SetInCast({"t0", "t1"}));
EXPECT_TRUE(G.SetOutCast({"t6"}));
Function* function = G.GetFunction();
ASSERT_NE(function, nullptr);
function->SetTotalSubGraphCount(5);
SubgraphToFunction pass;
pass.SetupStaticProcessor();
pass.staticProcessor_.BuildGraph(*function);
pass.RecordIncastOutcast(*function);
auto* abs_op = G.GetOp("ABS_SG2");
auto* add2_op = G.GetOp("ADD2_SG3");
auto* max_op = G.GetOp("MAX_SG4");
auto abs_consumers1 = abs_op->ConsumerOps();
EXPECT_TRUE(abs_consumers1.find(add2_op) != abs_consumers1.end());
EXPECT_TRUE(abs_consumers1.find(max_op) != abs_consumers1.end());
pass.staticProcessor_.BuildColorGraph(*function);
pass.staticProcessor_.EraseRedundantColorEdges(*function);
const auto& colorOutGraph = pass.staticProcessor_.colorOutGraph;
const int abs_sgid = G.GetOp("ABS_SG2")->GetSubgraphID();
const int max_sgid = G.GetOp("MAX_SG4")->GetSubgraphID();
const auto& abs_out_edges = colorOutGraph[abs_sgid];
bool found = std::find(abs_out_edges.begin(), abs_out_edges.end(), max_sgid) != abs_out_edges.end();
EXPECT_FALSE(found) << "Redundant edge not removed!";
}
TEST_F(SubgraphToFunctionTest, ReshapeDependencyHandling)
{
ComputationalGraphBuilder G;
std::vector<std::string> tensorNames{"t0", "t1", "t2"};
std::vector<Opcode> opCodes{Opcode::OP_RESHAPE, Opcode::OP_ABS};
std::vector<std::vector<std::string>> ioperands{
{"t0"},
{"t1"}
};
std::vector<std::vector<std::string>> ooperands{{"t1"}, {"t2"}};
std::vector<std::string> opNames{"RESHAPE_SG0", "ABS_SG1"};
EXPECT_TRUE(G.AddTensors(DataType::DT_FP32, {16, 16}, tensorNames));
EXPECT_TRUE(G.AddOps(opCodes, ioperands, ooperands, opNames, true));
auto set_subgraph_id = [&G](const std::string& op_name, int id) {
auto* op = G.GetOp(op_name);
ASSERT_NE(op, nullptr) << "Operation " << op_name << " not found!";
op->UpdateSubgraphID(id);
};
set_subgraph_id("RESHAPE_SG0", 0);
set_subgraph_id("ABS_SG1", 1);
Function* function = G.GetFunction();
ASSERT_NE(function, nullptr);
function->SetTotalSubGraphCount(2);
SubgraphToFunction pass;
pass.SetupStaticProcessor();
pass.staticProcessor_.BuildGraph(*function);
pass.RecordIncastOutcast(*function);
pass.ConstructParamMap(*function);
auto* reshape_op = G.GetOp("RESHAPE_SG0");
ASSERT_NE(reshape_op, nullptr);
const int reshape_sgid = reshape_op->GetSubgraphID();
EXPECT_TRUE(pass.staticProcessor_.isReshape[reshape_sgid])
<< "RESHAPE subgraph should be marked when it has no input subgraph and single reshape op";
EXPECT_TRUE(function->topoInfo_.GetSuccs(reshape_sgid).empty())
<< "RESHAPE subgraph should have empty successors set";
int expected_out_degree = 0;
bool found = false;
for (const auto& entry : function->topoInfo_.GetTopology()) {
if (entry.esgId == 1) {
EXPECT_EQ(entry.readyState, -1 * expected_out_degree)
<< "Consumer subgraph's readyOrNot should exclude RESHAPE inputs";
found = true;
break;
}
}
EXPECT_TRUE(found) << "ABS_SG1 subgraph entry not found in topology";
}
}
}