* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
* \file test_graph_partition.cpp
* \brief Unit test for GraphPartition pass.
*/
#include <fstream>
#include <vector>
#include <string>
#include "gtest/gtest.h"
#include "tilefwk/data_type.h"
#include "tilefwk/tilefwk_op.h"
#include "interface/function/function.h"
#include "interface/tensor/irbuilder.h"
#include "symbolic_scalar_test_utils.h"
#include "passes/tile_graph_pass/graph_partition/iso_partitioner.h"
#include "passes/tile_graph_pass/graph_partition/graph_partition.h"
#include "tilefwk/tilefwk.h"
#include "interface/inner/tilefwk.h"
#include "passes/pass_mgr/pass_manager.h"
#include "interface/configs/config_manager.h"
#include "computational_graph_builder.h"
namespace npu {
namespace tile_fwk {
class GraphPartitionTest : public testing::Test {
public:
static void SetUpTestCase() {}
static void TearDownTestCase() {}
void SetUp() override
{
Program::GetInstance().Reset();
config::Reset();
config::SetHostOption(COMPILE_STAGE, CS_EXECUTE_GRAPH);
config::SetHostConfig(KEY_STRATEGY, "GraphPartitionTestStrategy");
Platform::Instance().ObtainPlatformInfo();
}
void TearDown() override { Platform::Instance().GetSoc().SetNPUArch(NPUArch::DAV_UNKNOWN); }
};
void SetScopeInfoForOps(ComputationalGraphBuilder& G,
const std::vector<std::string>& opNames, const Operation::ScopeInfo& info)
{
for (const auto& name : opNames) {
G.GetOp(name)->SetScopeInfo(info);
}
}
void GetPairSumGraph(ComputationalGraphBuilder& G)
{
const int brNum = 4;
std::vector<int64_t> tileShape{16, 16};
for (int i = 0; i < brNum; i++) {
std::string br = std::to_string(i);
std::vector<std::string> tensorNames{"t1" + br, "t2" + br, "t3" + br, "t4" + br};
std::vector<Opcode> opCodes{Opcode::OP_COPY_IN, Opcode::OP_MULS, Opcode::OP_ADDS};
std::vector<std::vector<std::string>> ioperands{{"t1" + br}, {"t2" + br}, {"t3" + br}};
std::vector<std::vector<std::string>> ooperands{{"t2" + br}, {"t3" + br}, {"t4" + br}};
std::vector<std::string> opNames{"COPY_IN" + br, "MULS" + br, "ADDS" + br};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, tensorNames), true);
EXPECT_EQ(G.AddOps(opCodes, ioperands, ooperands, opNames, true), true);
}
std::vector<std::string> sumTensorNames{"s1", "s2", "s3"};
std::vector<Opcode> sumOpCodes{Opcode::OP_PAIRSUM, Opcode::OP_PAIRSUM, Opcode::OP_PAIRSUM};
std::vector<std::vector<std::string>> sumIoperands{{"t40", "t41"}, {"t42", "s1"}, {"t43", "s2"}};
std::vector<std::vector<std::string>> sumOoperands{{"s1"}, {"s2"}, {"s3"}};
std::vector<std::string> sumOpNames{"SUM1", "SUM2", "SUM3"};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, sumTensorNames), true);
EXPECT_EQ(G.AddOps(sumOpCodes, sumIoperands, sumOoperands, sumOpNames, true), true);
EXPECT_EQ(G.SetInCast({"t10", "t11", "t12", "t13"}), true);
EXPECT_EQ(G.SetOutCast({"s3"}), true);
}
TEST_F(GraphPartitionTest, TestBuildOpGraph)
{
ComputationalGraphBuilder G;
GetPairSumGraph(G);
Function* function = G.GetFunction();
const int parallelTH = 10;
const int cycleLB = 10;
const int useNodeHash = false;
IsoPartitioner partitioner;
EXPECT_EQ(partitioner.SetParameter(parallelTH, cycleLB, useNodeHash), SUCCESS);
EXPECT_EQ(partitioner.PartitionGraph(*function), SUCCESS);
EXPECT_EQ(partitioner.operationInfo_->opList_.size(), function->Operations().size());
EXPECT_EQ(partitioner.operationInfo_->magic2Idx_.size(), function->Operations().size());
EXPECT_EQ(partitioner.operationInfo_->inGraph_.size(), function->Operations().size());
EXPECT_EQ(partitioner.operationInfo_->opHashList_.size(), function->Operations().size());
EXPECT_EQ(partitioner.operationInfo_->opCoreType_.size(), function->Operations().size());
const std::vector<std::pair<std::string, int>> inLinkNum{{"COPY_IN0", 0}, {"MULS0", 1}, {"ADDS0", 1},
{"SUM1", 2}, {"SUM2", 2}, {"SUM3", 2}};
for (auto& pr : inLinkNum) {
EXPECT_NE(G.GetOp(pr.first), nullptr);
int opMagic = G.GetOp(pr.first)->GetOpMagic();
int opIdx = partitioner.operationInfo_->magic2Idx_[opMagic];
EXPECT_EQ(partitioner.operationInfo_->inGraph_[opIdx].size(), pr.second);
}
const std::vector<std::pair<std::string, int>> outLinkNum{{"COPY_IN0", 1}, {"MULS0", 1}, {"ADDS0", 1},
{"SUM1", 1}, {"SUM2", 1}, {"SUM3", 0}};
for (auto& pr : outLinkNum) {
EXPECT_NE(G.GetOp(pr.first), nullptr);
int opMagic = G.GetOp(pr.first)->GetOpMagic();
int opIdx = partitioner.operationInfo_->magic2Idx_[opMagic];
EXPECT_EQ(partitioner.operationInfo_->outGraph_[opIdx].size(), pr.second);
}
int copyIdx0 = partitioner.operationInfo_->magic2Idx_[G.GetOp("COPY_IN0")->GetOpMagic()];
int copyIdx1 = partitioner.operationInfo_->magic2Idx_[G.GetOp("COPY_IN1")->GetOpMagic()];
int sumIdx1 = partitioner.operationInfo_->magic2Idx_[G.GetOp("SUM1")->GetOpMagic()];
int sumIdx2 = partitioner.operationInfo_->magic2Idx_[G.GetOp("SUM2")->GetOpMagic()];
EXPECT_EQ(partitioner.operationInfo_->opHashList_[copyIdx0], partitioner.operationInfo_->opHashList_[copyIdx1]);
EXPECT_EQ(partitioner.operationInfo_->opHashList_[sumIdx1], partitioner.operationInfo_->opHashList_[sumIdx2]);
EXPECT_NE(partitioner.operationInfo_->opHashList_[copyIdx0], partitioner.operationInfo_->opHashList_[sumIdx2]);
std::unordered_set<uint64_t> copyInHash;
const int brNum = 4;
for (int i = 0; i < brNum; i++) {
EXPECT_NE(G.GetOp("COPY_IN" + std::to_string(i)), nullptr);
int opMagic = G.GetOp("COPY_IN" + std::to_string(i))->GetOpMagic();
int opIdx = partitioner.operationInfo_->magic2Idx_[opMagic];
int nodeIdx = partitioner.superNodeInfo_->op2Node_[opIdx];
copyInHash.insert(partitioner.superNodeInfo_->nodeHashList_[nodeIdx]);
}
const int copyInHashNum = 3;
EXPECT_EQ(copyInHash.size(), copyInHashNum);
}
void GetReshapeGraph(ComputationalGraphBuilder& G)
{
const int brNum = 4;
std::vector<int64_t> tileShape{16, 16};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, {"rin", "rout"}), true);
for (int i = 0; i < brNum; i++) {
std::string br = std::to_string(i);
std::vector<std::string> tensorNames{"t1" + br, "t2" + br, "t3" + br};
std::vector<Opcode> opCodes{Opcode::OP_COPY_IN, Opcode::OP_RESHAPE, Opcode::OP_ASSEMBLE};
std::vector<std::vector<std::string>> ioperands{{"t1" + br}, {"t2" + br}, {"t3" + br}};
std::vector<std::vector<std::string>> ooperands{{"t2" + br}, {"t3" + br}, {"rin"}};
std::vector<std::string> opNames{"COPY_IN" + br, "RESHAPE_IN" + br, "ASSEMBLE" + br};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, tensorNames), true);
EXPECT_EQ(G.AddOps(opCodes, ioperands, ooperands, opNames, true), true);
}
EXPECT_EQ(G.AddOp(Opcode::OP_RESHAPE, {"rin"}, {"rout"}, "MULTI_RESHAPE", true), true);
for (int i = 0; i < brNum; i++) {
std::string br = std::to_string(i);
std::vector<std::string> tensorNames{"b1" + br, "b2" + br, "b3" + br};
std::vector<Opcode> opCodes{Opcode::OP_VIEW, Opcode::OP_RESHAPE, Opcode::OP_COPY_OUT};
std::vector<std::vector<std::string>> ioperands{{"rout"}, {"b1" + br}, {"b2" + br}};
std::vector<std::vector<std::string>> ooperands{{"b1" + br}, {"b2" + br}, {"b3" + br}};
std::vector<std::string> opNames{"VIEW" + br, "RESHAPE_OUT" + br, "COPY_OUT" + br};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, tensorNames), true);
EXPECT_EQ(G.AddOps(opCodes, ioperands, ooperands, opNames, true), true);
}
EXPECT_EQ(G.SetInCast({"t10", "t11", "t12", "t13"}), true);
EXPECT_EQ(G.SetOutCast({"b30", "b31", "b32", "b33"}), true);
}
TEST_F(GraphPartitionTest, TestSuperNode)
{
ComputationalGraphBuilder G;
GetReshapeGraph(G);
Function* function = G.GetFunction();
const int parallelTH = 10;
const int cycleLB = 10;
const int useNodeHash = false;
IsoPartitioner partitioner;
EXPECT_EQ(partitioner.SetParameter(parallelTH, cycleLB, useNodeHash), SUCCESS);
EXPECT_EQ(partitioner.PartitionGraph(*function), SUCCESS);
std::unordered_set<int> frontReshapeNode;
const int brNum = 4;
for (int i = 0; i < brNum; i++) {
EXPECT_NE(G.GetOp("RESHAPE_IN" + std::to_string(i)), nullptr);
int opMagic = G.GetOp("RESHAPE_IN" + std::to_string(i))->GetOpMagic();
int opIdx = partitioner.operationInfo_->magic2Idx_[opMagic];
frontReshapeNode.insert(partitioner.superNodeInfo_->op2Node_[opIdx]);
}
EXPECT_EQ(frontReshapeNode.size(), brNum);
std::unordered_set<int> backReshapeNode;
for (int i = 0; i < brNum; i++) {
EXPECT_NE(G.GetOp("RESHAPE_OUT" + std::to_string(i)), nullptr);
int opMagic = G.GetOp("RESHAPE_OUT" + std::to_string(i))->GetOpMagic();
int opIdx = partitioner.operationInfo_->magic2Idx_[opMagic];
backReshapeNode.insert(partitioner.superNodeInfo_->op2Node_[opIdx]);
}
EXPECT_EQ(backReshapeNode.size(), brNum);
const int subGraphNum = 9;
EXPECT_EQ(function->GetTotalSubGraphCount(), subGraphNum);
}
TEST_F(GraphPartitionTest, TestReduceNodeHash)
{
ComputationalGraphBuilder G;
GetPairSumGraph(G);
Function* function = G.GetFunction();
const int parallelTH = 10;
const int cycleLB = 10;
const int useNodeHash = true;
IsoPartitioner partitioner;
EXPECT_EQ(partitioner.SetParameter(parallelTH, cycleLB, useNodeHash), SUCCESS);
EXPECT_EQ(partitioner.PartitionGraph(*function), SUCCESS);
std::unordered_set<uint64_t> copyInHash;
const int brNum = 4;
for (int i = 0; i < brNum; i++) {
EXPECT_NE(G.GetOp("COPY_IN" + std::to_string(i)), nullptr);
int opMagic = G.GetOp("COPY_IN" + std::to_string(i))->GetOpMagic();
int opIdx = partitioner.operationInfo_->magic2Idx_[opMagic];
int nodeIdx = partitioner.superNodeInfo_->op2Node_[opIdx];
copyInHash.insert(partitioner.superNodeInfo_->nodeHashList_[nodeIdx]);
}
EXPECT_EQ(copyInHash.size(), 1);
}
void GetCrossGraph(ComputationalGraphBuilder& G)
{
const int brNum = 4;
std::vector<int64_t> tileShape{16, 16};
for (int i = 0; i < brNum; i++) {
std::string br = std::to_string(i);
std::vector<std::string> tensorNames{"t1" + br, "t2" + br, "t3" + br, "t4" + br};
std::vector<Opcode> opCodes{Opcode::OP_COPY_IN, Opcode::OP_RESHAPE, Opcode::OP_ABS};
std::vector<std::vector<std::string>> ioperands{{"t1" + br}, {"t2" + br}, {"t3" + br}};
std::vector<std::vector<std::string>> ooperands{{"t2" + br}, {"t3" + br}, {"t4" + br}};
std::vector<std::string> opNames{"COPY_IN" + br, "RESHAPE_IN" + br, "ABS" + br};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, tensorNames), true);
EXPECT_EQ(G.AddOps(opCodes, ioperands, ooperands, opNames, true), true);
}
for (int i = 0; i < brNum; i++) {
std::string br = std::to_string(i);
std::string sbr = std::to_string((i + 1) % brNum);
std::vector<std::string> tensorNames{"b1" + br, "b2" + br, "b3" + br};
std::vector<Opcode> opCodes{Opcode::OP_MUL, Opcode::OP_RESHAPE, Opcode::OP_COPY_OUT};
std::vector<std::vector<std::string>> ioperands{{"t4" + br, "t4" + sbr}, {"b1" + br}, {"b2" + br}};
std::vector<std::vector<std::string>> ooperands{{"b1" + br}, {"b2" + br}, {"b3" + br}};
std::vector<std::string> opNames{"MUL" + br, "RESHAPE_OUT" + br, "COPY_OUT" + br};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, tensorNames), true);
EXPECT_EQ(G.AddOps(opCodes, ioperands, ooperands, opNames, true), true);
}
EXPECT_EQ(G.SetInCast({"t10", "t11", "t12", "t13"}), true);
EXPECT_EQ(G.SetOutCast({"b30", "b31", "b32", "b33"}), true);
}
TEST_F(GraphPartitionTest, TestBuildIsomorphismGraph)
{
ComputationalGraphBuilder G;
GetCrossGraph(G);
Function* function = G.GetFunction();
const int parallelTH = 10;
const int cycleLB = 100000;
const int useNodeHash = false;
IsoPartitioner partitioner;
EXPECT_EQ(partitioner.SetParameter(parallelTH, cycleLB, useNodeHash), SUCCESS);
EXPECT_EQ(partitioner.PartitionGraph(*function), SUCCESS);
const int subGraphNum = 8;
EXPECT_EQ(function->GetTotalSubGraphCount(), subGraphNum);
}
TEST_F(GraphPartitionTest, TestIsoParameterFailure)
{
ComputationalGraphBuilder G;
Function *function = G.GetFunction();
EXPECT_EQ(function->Operations().size(), 0);
const int parallelTHFail = -2;
const int parallelTH = 10;
const int cycleLBFail = -1;
const int cycleLB = 100000;
const int useNodeHash = false;
IsoPartitioner partitioner;
EXPECT_EQ(partitioner.SetParameter(parallelTHFail, cycleLB, useNodeHash), FAILED);
EXPECT_EQ(partitioner.SetParameter(parallelTH, cycleLBFail, useNodeHash), FAILED);
EXPECT_EQ(partitioner.PartitionGraph(*function), FAILED);
}
TEST_F(GraphPartitionTest, TestOspParameter)
{
ComputationalGraphBuilder G;
Function *function = G.GetFunction();
EXPECT_EQ(function->Operations().size(), 0);
OspPartitioner partitioner(OspMode::SARKAR);
const int cycleLBFail = -11;
const int cycleLB = 10000;
function->paramConfigs_.sgPgLowerBound = cycleLBFail;
EXPECT_EQ(partitioner.SetParameter(*function), FAILED);
function->paramConfigs_.sgPgLowerBound = cycleLB;
EXPECT_EQ(partitioner.SetParameter(*function), SUCCESS);
}
TEST_F(GraphPartitionTest, TestEmptyGraph)
{
ComputationalGraphBuilder G;
Function* function = G.GetFunction();
EXPECT_EQ(function->Operations().size(), 0);
GraphPartition gpp;
EXPECT_EQ(gpp.RunOnFunction(*function), SUCCESS);
EXPECT_EQ(function->GetTotalSubGraphCount(), 0);
}
TEST_F(GraphPartitionTest, TestSarkarEmptyGraph)
{
const std::string partitionAlg = "OspSarkar";
ComputationalGraphBuilder G;
Function *function = G.GetFunction();
function->paramConfigs_.sgPartitionAlgorithm = partitionAlg;
EXPECT_EQ(function->paramConfigs_.sgPartitionAlgorithm, partitionAlg);
EXPECT_EQ(function->Operations().size(), 0);
GraphPartition gpp;
EXPECT_EQ(gpp.RunOnFunction(*function), SUCCESS);
}
TEST_F(GraphPartitionTest, TestOrbitBspEmptyGraph)
{
const std::string partitionAlg = "OspBsp";
ComputationalGraphBuilder G;
Function *function = G.GetFunction();
function->paramConfigs_.sgPartitionAlgorithm = partitionAlg;
EXPECT_EQ(function->paramConfigs_.sgPartitionAlgorithm, partitionAlg);
EXPECT_EQ(function->Operations().size(), 0);
GraphPartition gpp;
EXPECT_EQ(gpp.RunOnFunction(*function), SUCCESS);
}
TEST_F(GraphPartitionTest, TestPartitionerParameterFailure)
{
const std::string partitionAlg = "NotExistent";
ComputationalGraphBuilder G;
Function *function = G.GetFunction();
function->paramConfigs_.sgPartitionAlgorithm = partitionAlg;
EXPECT_EQ(function->paramConfigs_.sgPartitionAlgorithm, partitionAlg);
EXPECT_EQ(function->Operations().size(), 0);
GraphPartition gpp;
EXPECT_EQ(gpp.RunOnFunction(*function), FAILED);
}
void GetCubeVectorGraph(ComputationalGraphBuilder &G, int brNum)
{
std::vector<int64_t> tileShape{16, 16};
std::vector<std::string> inTensorNames;
for (int i = 0; i < brNum; i++) {
std::string br = std::to_string(i);
inTensorNames.push_back("ta1" + br);
std::vector<std::string> tensorNames{"ta1" + br, "ta2" + br, "ta3" + br, "ta4" + br, "ta5" + br, "ta6" + br};
std::vector<MemoryType> tensorMemType{MemoryType::MEM_DEVICE_DDR, MemoryType::MEM_UB, MemoryType::MEM_UB,
MemoryType::MEM_DEVICE_DDR, MemoryType::MEM_L1, MemoryType::MEM_L0A};
std::vector<Opcode> opCodes{
Opcode::OP_COPY_IN, Opcode::OP_CAST, Opcode::OP_COPY_OUT, Opcode::OP_COPY_IN, Opcode::OP_L1_TO_L0A};
std::vector<std::vector<std::string>> ioperands{
{"ta1" + br}, {"ta2" + br}, {"ta3" + br}, {"ta4" + br}, {"ta5" + br}};
std::vector<std::vector<std::string>> ooperands{
{"ta2" + br}, {"ta3" + br}, {"ta4" + br}, {"ta5" + br}, {"ta6" + br}};
std::vector<std::string> opNames{"IN_A" + br, "CAST_A" + br, "OUT_A" + br, "IN_L1_A" + br, "L1_TO_L0A" + br};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, tensorMemType, tensorNames), true);
EXPECT_EQ(G.AddOps(opCodes, ioperands, ooperands, opNames, true), true);
}
for (int i = 0; i < brNum; i++) {
std::string br = std::to_string(i);
inTensorNames.push_back("tb1" + br);
std::vector<std::string> tensorNames{"tb1" + br, "tb2" + br, "tb3" + br, "tb4" + br, "tb5" + br, "tb6" + br};
std::vector<MemoryType> tensorMemType{MemoryType::MEM_DEVICE_DDR, MemoryType::MEM_UB, MemoryType::MEM_UB,
MemoryType::MEM_DEVICE_DDR, MemoryType::MEM_L1, MemoryType::MEM_L0B};
std::vector<Opcode> opCodes{
Opcode::OP_COPY_IN, Opcode::OP_CAST, Opcode::OP_COPY_OUT, Opcode::OP_COPY_IN, Opcode::OP_L1_TO_L0B};
std::vector<std::vector<std::string>> ioperands{
{"tb1" + br}, {"tb2" + br}, {"tb3" + br}, {"tb4" + br}, {"tb5" + br}};
std::vector<std::vector<std::string>> ooperands{
{"tb2" + br}, {"tb3" + br}, {"tb4" + br}, {"tb5" + br}, {"tb6" + br}};
std::vector<std::string> opNames{"IN_B" + br, "CAST_B" + br, "OUT_B" + br, "IN_L1_B" + br, "L1_TO_L0B" + br};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, tensorMemType, tensorNames), true);
EXPECT_EQ(G.AddOps(opCodes, ioperands, ooperands, opNames, true), true);
}
for (int i = 0; i < brNum; i++) {
EXPECT_EQ(G.AddTensor(DataType::DT_FP32, tileShape, MemoryType::MEM_L0C, "tc" + std::to_string(i)), true);
}
EXPECT_EQ(G.AddTensor(DataType::DT_FP32, tileShape, MemoryType::MEM_DEVICE_DDR, "tout"), true);
EXPECT_EQ(G.AddOp(Opcode::OP_A_MUL_B, {"ta60", "tb60"}, {"tc0"}, "MUL1", true), true);
for (int i = 1; i < brNum; i++) {
std::string br = std::to_string(i);
std::string lbr = std::to_string(i - 1);
EXPECT_EQ(
G.AddOp(Opcode::OP_A_MULACC_B, {"ta6" + br, "tb6" + br, "tc" + lbr}, {"tc" + br}, "MC" + br, true), true);
}
EXPECT_EQ(G.AddOp(Opcode::OP_COPY_OUT, {"tc" + std::to_string(brNum - 1)}, {"tout"}, "COPY_OUT_C", true), true);
EXPECT_EQ(G.SetInCast(inTensorNames), true);
EXPECT_EQ(G.SetOutCast({"tout"}), true);
}
TEST_F(GraphPartitionTest, TestCVGraph)
{
ComputationalGraphBuilder G;
const int brNum = 4;
GetCubeVectorGraph(G, brNum);
Function* function = G.GetFunction();
const int parallelTH = 1;
const int cycleLB = 100000;
const int useNodeHash = false;
IsoPartitioner partitioner;
EXPECT_EQ(partitioner.SetParameter(parallelTH, cycleLB, useNodeHash), SUCCESS);
EXPECT_EQ(partitioner.PartitionGraph(*function), SUCCESS);
std::unordered_set<std::string> cubeOp{"MUL1", "MC1", "MC2", "MC3", "COPY_OUT_C"};
for (int i = 0; i < brNum; i++) {
std::string br = std::to_string(i);
cubeOp.insert("IN_L1_A" + br);
cubeOp.insert("IN_L1_B" + br);
cubeOp.insert("L1_TO_L0A" + br);
cubeOp.insert("L1_TO_L0B" + br);
}
const int subGraphNum = 9;
EXPECT_EQ(function->GetTotalSubGraphCount(), subGraphNum);
std::unordered_set<int> subgraphIDs;
for (auto& opPair : G.operations_) {
Operation* op = opPair.second;
EXPECT_NE(op, nullptr);
if (cubeOp.count(opPair.first) > 0) {
EXPECT_EQ(op->HasAttr(OpAttributeKey::isCube) && op->GetBoolAttribute(OpAttributeKey::isCube), true);
} else {
EXPECT_EQ(op->HasAttr(OpAttributeKey::isCube) && !op->GetBoolAttribute(OpAttributeKey::isCube), true);
}
EXPECT_EQ(op->GetSubgraphID() >= 0 && op->GetSubgraphID() < subGraphNum, true);
subgraphIDs.insert(op->GetSubgraphID());
}
EXPECT_EQ(subgraphIDs.size(), subGraphNum);
}
TEST_F(GraphPartitionTest, TestOspCVGraph)
{
for (const auto partitionAlg : {"Iso", "OspSarkar", "OspBsp"}) {
ComputationalGraphBuilder G;
const int brNum = 4;
GetCubeVectorGraph(G, brNum);
Function *function = G.GetFunction();
function->paramConfigs_.sgPartitionAlgorithm = partitionAlg;
GraphPartition gpp;
EXPECT_EQ(gpp.PreCheck(*function), SUCCESS);
EXPECT_EQ(gpp.RunOnFunction(*function), SUCCESS);
std::unordered_set<std::string> cubeOp{"MUL1", "MC1", "MC2", "MC3", "COPY_OUT_C"};
for (int i = 0; i < brNum; i++) {
std::string br = std::to_string(i);
cubeOp.insert("IN_L1_A" + br);
cubeOp.insert("IN_L1_B" + br);
cubeOp.insert("L1_TO_L0A" + br);
cubeOp.insert("L1_TO_L0B" + br);
}
const int subGraphNum = function->GetTotalSubGraphCount();
std::unordered_map<int, bool> subgraphIDs2IsCube;
for (auto &opPair : G.operations_) {
Operation *op = opPair.second;
EXPECT_NE(op, nullptr);
if (cubeOp.count(opPair.first) > 0) {
EXPECT_EQ(op->HasAttr(OpAttributeKey::isCube) && op->GetBoolAttribute(OpAttributeKey::isCube), true);
} else {
EXPECT_EQ(op->HasAttr(OpAttributeKey::isCube) && !op->GetBoolAttribute(OpAttributeKey::isCube), true);
}
EXPECT_EQ(op->GetSubgraphID() >= 0 && op->GetSubgraphID() < subGraphNum, true);
const auto subgraphId = op->GetSubgraphID();
if (subgraphIDs2IsCube.count(subgraphId) > 0) {
EXPECT_EQ(subgraphIDs2IsCube.at(subgraphId), op->GetBoolAttribute(OpAttributeKey::isCube));
} else {
subgraphIDs2IsCube.emplace(subgraphId, op->GetBoolAttribute(OpAttributeKey::isCube));
}
}
EXPECT_EQ(subgraphIDs2IsCube.size(), subGraphNum);
EXPECT_EQ(gpp.PostCheck(*function), SUCCESS);
Program::GetInstance().Reset();
}
}
TEST_F(GraphPartitionTest, TestMixCVGraph)
{
for (const auto mode : {OspMode::SARKAR, OspMode::MERKLEBSP}) {
ComputationalGraphBuilder G;
const int brNum = 4;
GetCubeVectorGraph(G, brNum);
Function *function = G.GetFunction();
for (const bool cvMix : {true, false}) {
OspPartitioner partitioner(mode, cvMix);
EXPECT_EQ(partitioner.PartitionGraph(*function), SUCCESS);
const int subGraphNum = function->GetTotalSubGraphCount();
for (auto &opPair : G.operations_) {
Operation *op = opPair.second;
EXPECT_NE(op, nullptr);
EXPECT_EQ(op->GetSubgraphID() >= 0 && op->GetSubgraphID() < subGraphNum, true);
}
}
}
}
void GetMergeableGraph(ComputationalGraphBuilder &G, int brNum)
{
std::vector<int64_t> tileShape{16, 16};
std::vector<std::string> inCast;
std::vector<std::string> outCast;
for (int i = 0; i < brNum; i++) {
std::string br = std::to_string(i);
std::vector<std::string> tensorNames{"t1" + br, "t2" + br, "t3" + br, "t4" + br,
"b1" + br, "b2" + br, "b3" + br, "b4" + br};
std::vector<Opcode> opCodes{Opcode::OP_COPY_IN, Opcode::OP_RESHAPE, Opcode::OP_ABS,
Opcode::OP_COPY_IN, Opcode::OP_RESHAPE, Opcode::OP_ABS};
std::vector<std::vector<std::string>> ioperands{{"t1" + br}, {"t2" + br}, {"t3" + br},
{"b1" + br}, {"b2" + br}, {"b3" + br}};
std::vector<std::vector<std::string>> ooperands{{"t2" + br}, {"t3" + br}, {"t4" + br},
{"b2" + br}, {"b3" + br}, {"b4" + br}};
std::vector<std::string> opNames{"COPY_INt" + br, "RESHAPE_INt" + br, "ABSt" + br,
"COPY_INb" + br, "RESHAPE_INb" + br, "ABSb" + br};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, tensorNames), true);
EXPECT_EQ(G.AddOps(opCodes, ioperands, ooperands, opNames, true), true);
inCast.push_back("t1" + br);
inCast.push_back("b1" + br);
}
for (int i = 0; i < brNum; i++) {
std::string br = std::to_string(i);
std::vector<std::string> tensorNames{"m1" + br, "m2" + br, "m3" + br};
std::vector<Opcode> opCodes{Opcode::OP_MUL, Opcode::OP_RESHAPE, Opcode::OP_COPY_OUT};
std::vector<std::vector<std::string>> ioperands{{"t4" + br, "b4" + br}, {"m1" + br}, {"m2" + br}};
std::vector<std::vector<std::string>> ooperands{{"m1" + br}, {"m2" + br}, {"m3" + br}};
std::vector<std::string> opNames{"MUL" + br, "RESHAPE_OUT" + br, "COPY_OUT" + br};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, tensorNames), true);
EXPECT_EQ(G.AddOps(opCodes, ioperands, ooperands, opNames, true), true);
outCast.push_back("m3" + br);
}
EXPECT_EQ(G.SetInCast(inCast), true);
EXPECT_EQ(G.SetOutCast(outCast), true);
}
TEST_F(GraphPartitionTest, TestDynamicCycleEstimation)
{
ComputationalGraphBuilder G;
const int brNum = 4;
GetMergeableGraph(G, brNum);
Function* function = G.GetFunction();
const int parallelTH = 1;
const int cycleLB = 100000;
const int useNodeHash = false;
IsoPartitioner partitioner;
EXPECT_EQ(partitioner.SetParameter(parallelTH, cycleLB, useNodeHash), SUCCESS);
EXPECT_EQ(partitioner.PartitionGraph(*function), SUCCESS);
const int subGraphNum = brNum;
EXPECT_EQ(function->GetTotalSubGraphCount(), subGraphNum);
}
TEST_F(GraphPartitionTest, TestParallelThreshold)
{
ComputationalGraphBuilder G;
const int brNum = 4;
GetMergeableGraph(G, brNum);
Function* function = G.GetFunction();
const int parallelTH = brNum * 2;
const int cycleLB = 0;
const int useNodeHash = false;
IsoPartitioner partitioner;
EXPECT_EQ(partitioner.SetParameter(parallelTH, cycleLB, useNodeHash), SUCCESS);
EXPECT_EQ(partitioner.PartitionGraph(*function), SUCCESS);
const int subGraphNum = 3 * brNum;
EXPECT_EQ(function->GetTotalSubGraphCount(), subGraphNum);
}
TEST_F(GraphPartitionTest, TestSmallGraphBound)
{
ComputationalGraphBuilder G;
const int brNum = 4;
GetMergeableGraph(G, brNum);
Function* function = G.GetFunction();
const int parallelTH = brNum * 2;
const int cycleLB = 100000;
const int useNodeHash = false;
IsoPartitioner partitioner;
EXPECT_EQ(partitioner.SetParameter(parallelTH, cycleLB, useNodeHash), SUCCESS);
EXPECT_EQ(partitioner.PartitionGraph(*function), SUCCESS);
const int subGraphNum = brNum;
EXPECT_EQ(function->GetTotalSubGraphCount(), subGraphNum);
}
TEST_F(GraphPartitionTest, TestLargeSuperNode)
{
ComputationalGraphBuilder G;
const int brNum = 5000;
GetCubeVectorGraph(G, brNum);
Function* function = G.GetFunction();
const int parallelTH = brNum * 2;
const int cycleLB = 100000;
const int useNodeHash = false;
IsoPartitioner partitioner;
EXPECT_EQ(partitioner.SetParameter(parallelTH, cycleLB, useNodeHash), SUCCESS);
EXPECT_EQ(partitioner.PartitionGraph(*function), SUCCESS);
}
void GetWideGraph(ComputationalGraphBuilder& G, int brNum)
{
std::vector<int64_t> tileShape{16, 16};
std::vector<std::string> outCast;
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, {"h1", "h2", "h3"}), true);
EXPECT_EQ(G.AddOp(Opcode::OP_COPY_IN, {"h1"}, {"h2"}, "COPY_IN", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_ABS, {"h2"}, {"h3"}, "ABS", true), true);
for (int i = 0; i < brNum; i++) {
std::string br = std::to_string(i);
std::vector<std::string> tensorNames{"t1" + br, "t2" + br, "t3" + br};
std::vector<Opcode> opCodes{Opcode::OP_EXP, Opcode::OP_RESHAPE, Opcode::OP_COPY_OUT};
std::vector<std::vector<std::string>> ioperands{{"h3"}, {"t1" + br}, {"t2" + br}};
std::vector<std::vector<std::string>> ooperands{{"t1" + br}, {"t2" + br}, {"t3" + br}};
std::vector<std::string> opNames{"EXP" + br, "RESHAPE" + br, "COPY_OUT" + br};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, tensorNames), true);
EXPECT_EQ(G.AddOps(opCodes, ioperands, ooperands, opNames, true), true);
outCast.push_back("t3" + br);
}
EXPECT_EQ(G.SetInCast({"h1"}), true);
EXPECT_EQ(G.SetOutCast(outCast), true);
}
TEST_F(GraphPartitionTest, TestLargeWideGraph)
{
ComputationalGraphBuilder G;
const int brNum = 5000;
GetWideGraph(G, brNum);
Function* function = G.GetFunction();
const int parallelTH = 20;
const int cycleLB = 100000;
const int useNodeHash = false;
IsoPartitioner partitioner;
EXPECT_EQ(partitioner.SetParameter(parallelTH, cycleLB, useNodeHash), SUCCESS);
EXPECT_EQ(partitioner.PartitionGraph(*function), SUCCESS);
}
void GetDeepGraph(ComputationalGraphBuilder& G, int brNum)
{
std::vector<int64_t> tileShape{16, 16};
std::vector<std::string> outCast;
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, {"ha", "a0", "hb", "b0"}), true);
EXPECT_EQ(G.AddOp(Opcode::OP_COPY_IN, {"ha"}, {"a0"}, "COPY_INa", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_COPY_IN, {"hb"}, {"b0"}, "COPY_INb", true), true);
for (int i = 0; i < brNum; i++) {
std::string br = std::to_string(i);
std::string nbr = std::to_string(i + 1);
std::vector<std::string> tensorNames{"a" + nbr, "b" + nbr};
std::vector<Opcode> opCodes{Opcode::OP_ABS, Opcode::OP_ABS};
std::vector<std::vector<std::string>> ioperands{{"a" + br}, {"b" + br}};
std::vector<std::vector<std::string>> ooperands{{"a" + nbr}, {"b" + nbr}};
std::vector<std::string> opNames{"ABSa" + br, "ABSb" + br};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, tensorNames), true);
EXPECT_EQ(G.AddOps(opCodes, ioperands, ooperands, opNames, true), true);
}
std::string tbr = std::to_string(brNum);
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, {"ta", "tb"}), true);
EXPECT_EQ(G.AddOp(Opcode::OP_COPY_OUT, {"a" + tbr}, {"ta"}, "COPY_OUTa", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_COPY_OUT, {"b" + tbr}, {"tb"}, "COPY_OUTb", true), true);
EXPECT_EQ(G.SetInCast({"ha", "hb"}), true);
EXPECT_EQ(G.SetOutCast({"ta", "tb"}), true);
}
TEST_F(GraphPartitionTest, TestLargeDeepGraph)
{
ComputationalGraphBuilder G;
const int brNum = 5000;
GetDeepGraph(G, brNum);
Function* function = G.GetFunction();
const int parallelTH = 20;
const int cycleLB = 100000;
const int useNodeHash = false;
IsoPartitioner partitioner;
EXPECT_EQ(partitioner.SetParameter(parallelTH, cycleLB, useNodeHash), SUCCESS);
EXPECT_EQ(partitioner.PartitionGraph(*function), SUCCESS);
}
TEST_F(GraphPartitionTest, TestIsomorphismGraph)
{
ComputationalGraphBuilder G;
std::vector<int64_t> tileShape{16, 16};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, {"h1", "h2", "h3", "h41", "h42", "h5", "h6"}), true);
EXPECT_EQ(G.AddOp(Opcode::OP_COPY_IN, {"h1"}, {"h2"}, "COPY_IN", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_ABS, {"h2"}, {"h3"}, "ABS", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_ADDS, {"h3"}, {"h41"}, "ADDS1", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_ADDS, {"h3"}, {"h42"}, "ADDS2", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_MUL, {"h41", "h42"}, {"h5"}, "MUL", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_COPY_OUT, {"h5"}, {"h6"}, "COPY_OUT", true), true);
EXPECT_EQ(G.SetInCast({"h1"}), true);
EXPECT_EQ(G.SetOutCast({"h6"}), true);
Function* function = G.GetFunction();
const int parallelTH = 20;
const int cycleLB = 100000;
const int useNodeHash = false;
IsoPartitioner partitioner;
EXPECT_EQ(partitioner.SetParameter(parallelTH, cycleLB, useNodeHash), SUCCESS);
EXPECT_EQ(partitioner.PartitionGraph(*function), SUCCESS);
const int subGraphNum = 4;
EXPECT_EQ(function->GetTotalSubGraphCount(), subGraphNum);
}
void RunScopeTest(bool allowCrossScopeMerge, int expectedSubGraphNum)
{
ComputationalGraphBuilder G;
std::vector<int64_t> tileShape{32, 32};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, {"h1", "h2", "h3", "h41", "h42", "h5", "h6"}), true);
EXPECT_EQ(G.AddOp(Opcode::OP_COPY_IN, {"h1"}, {"h2"}, "COPY_IN", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_ABS, {"h2"}, {"h3"}, "ABS", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_ADDS, {"h3"}, {"h41"}, "A1", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_ADDS, {"h3"}, {"h42"}, "A2", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_MUL, {"h41", "h42"}, {"h5"}, "M", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_COPY_OUT, {"h5"}, {"h6"}, "COPY_OUT", true), true);
EXPECT_EQ(G.SetInCast({"h1"}), true);
EXPECT_EQ(G.SetOutCast({"h6"}), true);
Operation::ScopeInfo info;
info.scopeId = 1;
info.allowCrossScopeMerge = allowCrossScopeMerge;
for (auto* op : {G.GetOp("A1"), G.GetOp("A2"), G.GetOp("M"), G.GetOp("COPY_OUT")}) {
op->SetScopeInfo(info);
}
Function* function = G.GetFunction();
const int parallelTH = 20;
const int cycleLB = 100000;
const int useNodeHash = false;
IsoPartitioner partitioner;
EXPECT_EQ(partitioner.SetParameter(parallelTH, cycleLB, useNodeHash), SUCCESS);
EXPECT_EQ(partitioner.PartitionGraph(*function), SUCCESS);
EXPECT_EQ(function->GetTotalSubGraphCount(), expectedSubGraphNum);
}
TEST_F(GraphPartitionTest, TestScopeCase1) { RunScopeTest(false, 2); }
TEST_F(GraphPartitionTest, TestScopeCase2) { RunScopeTest(true, 1); }
void RunScopeTest2(bool paraller, bool crossScopeMerge, int expectedSubGraphNum)
{
ComputationalGraphBuilder G;
const int brNum = 1;
GetMergeableGraph(G, brNum);
Function* function = G.GetFunction();
Operation::ScopeInfo info(1);
info.allowParallelMerge = paraller;
info.allowCrossScopeMerge = crossScopeMerge,
SetScopeInfoForOps(G, {"COPY_INt0", "RESHAPE_INt0", "ABSt0", "COPY_INb0", "RESHAPE_INb0", "ABSb0"}, info);
IsoPartitioner partitioner;
EXPECT_EQ(partitioner.SetParameter(100000, 20, 0), SUCCESS);
EXPECT_EQ(partitioner.PartitionGraph(*function), SUCCESS);
EXPECT_EQ(function->GetTotalSubGraphCount(), expectedSubGraphNum);
}
TEST_F(GraphPartitionTest, TestScopeCase3) { RunScopeTest2(true, false, 2); }
TEST_F(GraphPartitionTest, TestScopeCase4) { RunScopeTest2(true, true, 1); }
void AddGLMTensors(ComputationalGraphBuilder& G)
{
std::vector<int64_t> shapeSij{12, 512};
std::vector<int64_t> shapeOne{12, 1};
std::vector<int64_t> shapeOi{12, 128};
std::vector<int64_t> shapeVBlock{128, 128};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, shapeSij, {"sij", "sij_ub", "sij_scale", "tsub", "tilda_pij"}), true);
EXPECT_EQ(
G.AddTensors(
DataType::DT_FP32, shapeOne,
{"max_update", "sum_update", "max_update_ub", "tilda_mij", "max_new", "sum_local", "max_new_ddr",
"max_update_ub2", "tsub2", "update_mul", "sum_update_ub", "tmp_mul", "sum_update_out",
"sum_update_out_ddr"}),
true);
EXPECT_EQ(
G.AddTensors(
DataType::DT_FP32, shapeOi,
{"oi_update", "oi_tmp_ddr", "oi_update_ub", "oi_tmp_ub", "tmp_oi", "oi_update_out", "oi_update_out_ddr"}),
true);
EXPECT_EQ(G.AddTensors(DataType::DT_BF16, shapeSij, {"tilda_pij_fp16", "pij_assembled_ddr"}), true);
EXPECT_EQ(G.AddTensors(DataType::DT_BF16, std::vector<int64_t>{-1, 128}, {"value_cache_2d"}), true);
std::vector<MemoryType> memL1(4, MemoryType::MEM_L1);
std::vector<MemoryType> memL0B(4, MemoryType::MEM_L0B);
std::vector<MemoryType> memL0A(4, MemoryType::MEM_L0A);
std::vector<MemoryType> memL0C(4, MemoryType::MEM_L0C);
EXPECT_EQ(G.AddTensors(DataType::DT_BF16, shapeVBlock, memL1, {"v_l1_0", "v_l1_1", "v_l1_2", "v_l1_3"}), true);
EXPECT_EQ(G.AddTensors(DataType::DT_BF16, shapeVBlock, memL0B, {"v_l0b_0", "v_l0b_1", "v_l0b_2", "v_l0b_3"}), true);
EXPECT_EQ(G.AddTensors(DataType::DT_BF16, shapeOi, memL1, {"a_l1_0", "a_l1_1", "a_l1_2", "a_l1_3"}), true);
EXPECT_EQ(G.AddTensors(DataType::DT_BF16, shapeOi, memL0A, {"a_l0a_0", "a_l0a_1", "a_l0a_2", "a_l0a_3"}), true);
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, shapeOi, memL0C, {"mmul_a", "mmul_b", "mmul_c", "mmul_d"}), true);
EXPECT_EQ(G.AddTensor(DataType::DT_FP32, {12, 128}, "div_out"), true);
EXPECT_EQ(G.AddTensor(DataType::DT_FP32, {1, 12, 128}, "reshape_out"), true);
EXPECT_EQ(G.AddTensor(DataType::DT_FP32, {1, 12, 128}, "cast_out"), true);
EXPECT_EQ(G.AddTensor(DataType::DT_FP32, {1, 12, 128}, "atten_out"), true);
}
void AddGLMSoftmaxOps(ComputationalGraphBuilder& G)
{
EXPECT_EQ(G.AddOp(Opcode::OP_VIEW, {"sij"}, {"sij_ub"}, "VIEW_sij", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_MULS, {"sij_ub"}, {"sij_scale"}, "MULS_scale", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_ROWMAX_SINGLE, {"sij_scale"}, {"tilda_mij"}, "ROWMAX_mij", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_VIEW, {"max_update"}, {"max_update_ub"}, "VIEW_max_update", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_MAXIMUM, {"max_update_ub", "tilda_mij"}, {"max_new"}, "MAX_new", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_SUB, {"sij_scale", "max_new"}, {"tsub"}, "SUB_tsub", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_EXP, {"tsub"}, {"tilda_pij"}, "EXP_pij", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_CAST, {"tilda_pij"}, {"tilda_pij_fp16"}, "CAST_fp16", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_ASSEMBLE, {"tilda_pij_fp16"}, {"pij_assembled_ddr"}, "ASSEMBLE_pij_ddr", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_ROWSUM_SINGLE, {"tilda_pij"}, {"sum_local"}, "ROWSUM_local", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_ASSEMBLE, {"max_new"}, {"max_new_ddr"}, "ASSEMBLE_max_new_ddr", true), true);
}
void AddGLMCubeOps(ComputationalGraphBuilder& G)
{
EXPECT_EQ(G.AddOp(Opcode::OP_VIEW, {"value_cache_2d"}, {"v_l1_0"}, "VIEW_v_l1_0", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_VIEW, {"value_cache_2d"}, {"v_l1_1"}, "VIEW_v_l1_1", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_VIEW, {"value_cache_2d"}, {"v_l1_2"}, "VIEW_v_l1_2", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_VIEW, {"value_cache_2d"}, {"v_l1_3"}, "VIEW_v_l1_3", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_VIEW, {"pij_assembled_ddr"}, {"a_l1_0"}, "VIEW_a_l1_0", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_VIEW, {"pij_assembled_ddr"}, {"a_l1_1"}, "VIEW_a_l1_1", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_VIEW, {"pij_assembled_ddr"}, {"a_l1_2"}, "VIEW_a_l1_2", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_VIEW, {"pij_assembled_ddr"}, {"a_l1_3"}, "VIEW_a_l1_3", true), true);
{
std::vector<std::pair<std::string, std::string>> vViews = {
{"v_l1_0", "v_l0b_0"}, {"v_l1_1", "v_l0b_1"}, {"v_l1_2", "v_l0b_2"}, {"v_l1_3", "v_l0b_3"}};
for (int i = 0; i < static_cast<int>(vViews.size()); i++) {
std::string opName = "VIEW_L0B_v" + std::to_string(i);
EXPECT_EQ(G.AddOp(Opcode::OP_VIEW, {vViews[i].first}, {vViews[i].second}, opName, true), true);
auto viewOp = G.GetOp(opName);
std::vector<int64_t> offset = {0, 0, 0};
viewOp->SetOpAttribute(std::make_shared<ViewOpAttribute>(offset, MemoryType::MEM_L0B));
}
}
{
std::vector<std::pair<std::string, std::string>> aViews = {
{"a_l1_0", "a_l0a_0"}, {"a_l1_1", "a_l0a_1"}, {"a_l1_2", "a_l0a_2"}, {"a_l1_3", "a_l0a_3"}};
for (int i = 0; i < static_cast<int>(aViews.size()); i++) {
std::string opName = "VIEW_L0A_a" + std::to_string(i);
EXPECT_EQ(G.AddOp(Opcode::OP_VIEW, {aViews[i].first}, {aViews[i].second}, opName, true), true);
auto viewOp = G.GetOp(opName);
std::vector<int64_t> offset = {0, 0, 0};
viewOp->SetOpAttribute(std::make_shared<ViewOpAttribute>(offset, MemoryType::MEM_L0A));
}
}
EXPECT_EQ(G.AddOp(Opcode::OP_A_MUL_B, {"a_l0a_0", "v_l0b_0"}, {"mmul_a"}, "MMUL_a", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_A_MULACC_B, {"a_l0a_1", "v_l0b_1", "mmul_a"}, {"mmul_b"}, "MMACC_b", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_A_MULACC_B, {"a_l0a_2", "v_l0b_2", "mmul_b"}, {"mmul_c"}, "MMACC_c", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_A_MULACC_B, {"a_l0a_3", "v_l0b_3", "mmul_c"}, {"mmul_d"}, "MMACC_d", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_ASSEMBLE, {"mmul_d"}, {"oi_tmp_ddr"}, "ASSEMBLE_oi_tmp_ddr", true), true);
}
void AddGLMUpdateOps(ComputationalGraphBuilder& G)
{
EXPECT_EQ(G.AddOp(Opcode::OP_VIEW, {"max_update"}, {"max_update_ub2"}, "VIEW_max_update2", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_SUB, {"max_update_ub2", "max_new"}, {"tsub2"}, "SUB_tsub2", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_EXP, {"tsub2"}, {"update_mul"}, "EXP_update_mul", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_VIEW, {"sum_update"}, {"sum_update_ub"}, "VIEW_sum_update", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_MUL, {"sum_update_ub", "update_mul"}, {"tmp_mul"}, "MUL_tmp", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_ADD, {"tmp_mul", "sum_local"}, {"sum_update_out"}, "ADD_sum_update", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_ASSEMBLE, {"sum_update_out"}, {"sum_update_out_ddr"}, "ASSEMBLE_sum_ddr", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_VIEW, {"oi_update"}, {"oi_update_ub"}, "VIEW_oi_update", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_VIEW, {"oi_tmp_ddr"}, {"oi_tmp_ub"}, "VIEW_oi_tmp", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_MUL, {"oi_update_ub", "update_mul"}, {"tmp_oi"}, "MUL_oi", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_ADD, {"tmp_oi", "oi_tmp_ub"}, {"oi_update_out"}, "ADD_oi_update", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_ASSEMBLE, {"oi_update_out"}, {"oi_update_out_ddr"}, "ASSEMBLE_oi_ddr", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_DIV, {"oi_update_out", "sum_update_out"}, {"div_out"}, "DIV_1", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_RESHAPE, {"div_out"}, {"reshape_out"}, "RESHAPE_1", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_CAST, {"reshape_out"}, {"cast_out"}, "CAST_1", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_ASSEMBLE, {"cast_out"}, {"atten_out"}, "ASSEMBLE_1", true), true);
}
int VerifyOpsInSameSubgraph(ComputationalGraphBuilder& G, const std::vector<std::string>& opNames)
{
int subgraphId = G.GetOp(opNames[0])->GetSubgraphID();
for (const auto& name : opNames) {
EXPECT_EQ(G.GetOp(name)->GetSubgraphID(), subgraphId);
}
return subgraphId;
}
void ConstructGLMAttentionCase(ComputationalGraphBuilder& G)
{
AddGLMTensors(G);
AddGLMSoftmaxOps(G);
AddGLMCubeOps(G);
AddGLMUpdateOps(G);
EXPECT_EQ(G.SetInCast({"sij", "max_update", "sum_update", "oi_update", "value_cache_2d"}), true);
EXPECT_EQ(G.SetOutCast({"max_new_ddr", "sum_update_out_ddr", "oi_update_out_ddr", "atten_out"}), true);
}
TEST_F(GraphPartitionTest, TestScopeCase5)
{
ComputationalGraphBuilder G;
ConstructGLMAttentionCase(G);
Operation::ScopeInfo scope1(1);
scope1.allowParallelMerge = true;
scope1.allowCrossScopeMerge = false;
std::vector<std::string> scope1Ops = {"MULS_scale", "ROWMAX_mij", "MAX_new", "SUB_tsub",
"EXP_pij", "CAST_fp16", "ROWSUM_local"};
SetScopeInfoForOps(G, scope1Ops, scope1);
Operation::ScopeInfo scope2(2);
scope2.allowParallelMerge = true;
scope2.allowCrossScopeMerge = false;
std::vector<std::string> scope2Ops = {"SUB_tsub2", "EXP_update_mul", "MUL_tmp", "ADD_sum_update"};
SetScopeInfoForOps(G, scope2Ops, scope2);
Function* function = G.GetFunction();
IsoPartitioner partitioner;
EXPECT_EQ(partitioner.SetParameter(100000, 20, 0), SUCCESS);
EXPECT_EQ(partitioner.PartitionGraph(*function), SUCCESS);
int scope1Subgraph = VerifyOpsInSameSubgraph(G, scope1Ops);
int scope2Subgraph = VerifyOpsInSameSubgraph(G, scope2Ops);
EXPECT_NE(scope1Subgraph, scope2Subgraph);
std::vector<std::string> cubeOps = {
"VIEW_v_l1_0", "VIEW_v_l1_1", "VIEW_v_l1_2", "VIEW_v_l1_3", "VIEW_a_l1_0", "VIEW_a_l1_1",
"VIEW_a_l1_2", "VIEW_a_l1_3", "VIEW_L0B_v0", "VIEW_L0B_v1", "VIEW_L0B_v2", "VIEW_L0B_v3",
"VIEW_L0A_a0", "VIEW_L0A_a1", "VIEW_L0A_a2", "VIEW_L0A_a3", "MMUL_a", "MMACC_b",
"MMACC_c", "MMACC_d", "ASSEMBLE_oi_tmp_ddr"};
int cubeSubgraph = VerifyOpsInSameSubgraph(G, cubeOps);
for (const auto& name : cubeOps) {
auto op = G.GetOp(name);
auto isCube = op->HasAttr(OpAttributeKey::isCube) && op->GetBoolAttribute(OpAttributeKey::isCube);
EXPECT_EQ(isCube, true);
EXPECT_EQ(op->GetCvFuseId(), -1);
}
EXPECT_NE(cubeSubgraph, scope1Subgraph);
EXPECT_NE(cubeSubgraph, scope2Subgraph);
std::vector<std::string> v2Ops = {"VIEW_oi_update", "VIEW_oi_tmp", "MUL_oi", "ADD_oi_update", "ASSEMBLE_oi_ddr"};
int v2Subgraph = VerifyOpsInSameSubgraph(G, v2Ops);
EXPECT_NE(v2Subgraph, scope1Subgraph);
EXPECT_NE(v2Subgraph, scope2Subgraph);
EXPECT_NE(v2Subgraph, cubeSubgraph);
}
TEST_F(GraphPartitionTest, TestScopeCase6)
{
Platform::Instance().GetSoc().SetNPUArch(NPUArch::DAV_3510);
ComputationalGraphBuilder G;
ConstructGLMAttentionCase(G);
Operation::ScopeInfo scope1;
scope1.scopeId = 1;
std::vector<std::string> vecOps = {"MULS_scale", "ROWMAX_mij", "MAX_new", "SUB_tsub",
"EXP_pij", "CAST_fp16", "ROWSUM_local", "SUB_tsub2",
"EXP_update_mul", "MUL_tmp", "ADD_sum_update"};
SetScopeInfoForOps(G, vecOps, scope1);
std::vector<std::string> cubeOps = {
"VIEW_v_l1_0", "VIEW_v_l1_1", "VIEW_v_l1_2", "VIEW_v_l1_3", "VIEW_a_l1_0", "VIEW_a_l1_1",
"VIEW_a_l1_2", "VIEW_a_l1_3", "VIEW_L0B_v0", "VIEW_L0B_v1", "VIEW_L0B_v2", "VIEW_L0B_v3",
"VIEW_L0A_a0", "VIEW_L0A_a1", "VIEW_L0A_a2", "VIEW_L0A_a3", "MMUL_a", "MMACC_b",
"MMACC_c", "MMACC_d", "ASSEMBLE_oi_tmp_ddr"};
SetScopeInfoForOps(G, cubeOps, scope1);
Function* function = G.GetFunction();
IsoPartitioner partitioner;
EXPECT_EQ(partitioner.SetParameter(100000, 20, 0), SUCCESS);
EXPECT_EQ(partitioner.PartitionGraph(*function), SUCCESS);
for (const auto& name : cubeOps) {
const auto& op = G.GetOp(name);
EXPECT_EQ(op->HasAttr(OpAttributeKey::isCube), true);
EXPECT_EQ(op->GetBoolAttribute(OpAttributeKey::isCube), true);
EXPECT_EQ(op->GetCvFuseId(), 0);
}
std::unordered_set<std::string> cubeOpSet(cubeOps.begin(), cubeOps.end());
std::unordered_set<std::string> vecOpSet(vecOps.begin(), vecOps.end());
for (const auto& opPair : G.operations_) {
if (cubeOpSet.count(opPair.first) == 0) {
const auto& op = opPair.second;
EXPECT_EQ(op->HasAttr(OpAttributeKey::isCube), true) << op->GetOpcodeStr() << op->GetOpMagic();
EXPECT_EQ(op->GetBoolAttribute(OpAttributeKey::isCube), false) << op->GetOpcodeStr() << op->GetOpMagic();
if (vecOpSet.count(opPair.first) != 0) {
EXPECT_EQ(op->GetCvFuseId(), 0);
}
}
}
}
void GetViewAssembleOnlySuperNodeGraph(ComputationalGraphBuilder& G)
{
std::vector<int64_t> tileShape{16, 16};
G.AddTensors(
DataType::DT_FP32, tileShape,
{"t_in", "v_out1", "v_out2", "t_mid", "t_reshape_out", "t_view_in2", "t_view_out2", "t_mul_out", "t_div_out",
"a_out1", "v_out3", "v_out4", "a_out2", "a_out3", "t_final"});
std::vector<Opcode> opCodes{Opcode::OP_VIEW, Opcode::OP_VIEW, Opcode::OP_ASSEMBLE, Opcode::OP_ASSEMBLE,
Opcode::OP_RESHAPE, Opcode::OP_VIEW, Opcode::OP_MUL, Opcode::OP_DIV,
Opcode::OP_ASSEMBLE, Opcode::OP_VIEW, Opcode::OP_VIEW, Opcode::OP_ASSEMBLE,
Opcode::OP_ASSEMBLE};
std::vector<std::vector<std::string>> ioperands{{"t_in"},
{"t_in"},
{"v_out1"},
{"v_out2"},
{"t_mid"},
{"t_view_in2"},
{"t_view_out2", "t_reshape_out"},
{"t_mul_out"},
{"t_div_out"},
{"t_div_out"},
{"a_out1"},
{"v_out3"},
{"v_out4"}};
std::vector<std::vector<std::string>> ooperands{
{"v_out1"}, {"v_out2"}, {"t_mid"}, {"t_mid"}, {"t_reshape_out"}, {"t_view_out2"}, {"t_mul_out"},
{"t_div_out"}, {"a_out1"}, {"v_out3"}, {"v_out4"}, {"t_final"}, {"t_final"}};
std::vector<std::string> opNames{"VIEW1", "VIEW2", "ASSEMBLE1", "ASSEMBLE2", "RESHAPE", "VIEW3", "MUL",
"DIV", "ASSEMBLE3", "VIEW4", "VIEW5", "ASSEMBLE4", "ASSEMBLE5"};
EXPECT_EQ(G.AddOps(opCodes, ioperands, ooperands, opNames, true), true);
EXPECT_EQ(G.SetInCast({"t_in", "t_view_in2"}), true);
EXPECT_EQ(G.SetOutCast({"t_final", "t_mul_out"}), true);
Operation::ScopeInfo scope1(1);
scope1.allowCrossScopeMerge = true;
for (const auto& name : opNames) {
G.GetOp(name)->SetScopeInfo(scope1);
}
}
TEST_F(GraphPartitionTest, TestViewAssembleOnlySuperNodeScopeId)
{
ComputationalGraphBuilder G;
GetViewAssembleOnlySuperNodeGraph(G);
Function* function = G.GetFunction();
IsoPartitioner partitioner;
EXPECT_EQ(partitioner.SetParameter(100000, 20, 0), SUCCESS);
EXPECT_EQ(partitioner.PartitionGraph(*function), SUCCESS);
EXPECT_EQ(function->GetTotalSubGraphCount(), 1);
}
TEST_F(GraphPartitionTest, TestNonIsomorphismGraph)
{
ComputationalGraphBuilder G;
std::vector<int64_t> tileShape{16, 16};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, {"hin", "h2", "h3", "h41", "h42", "h5", "hout"}), true);
EXPECT_EQ(G.AddOp(Opcode::OP_COPY_IN, {"hin"}, {"h2"}, "COPY_IN", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_ABS, {"h2"}, {"h3"}, "ABS", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_ADDS, {"h3"}, {"h41"}, "ADDS1", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_MULS, {"h3"}, {"h42"}, "MULS2", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_MUL, {"h41", "h42"}, {"h5"}, "MUL", true), true);
EXPECT_EQ(G.AddOp(Opcode::OP_COPY_OUT, {"h5"}, {"hout"}, "COPY_OUT", true), true);
EXPECT_EQ(G.SetInCast({"hin"}), true);
EXPECT_EQ(G.SetOutCast({"hout"}), true);
Function* function = G.GetFunction();
const int parallelTH = 20;
const int cycleLB = 100000;
const int useNodeHash = false;
IsoPartitioner partitioner;
EXPECT_EQ(partitioner.SetParameter(parallelTH, cycleLB, useNodeHash), SUCCESS);
EXPECT_EQ(partitioner.PartitionGraph(*function), SUCCESS);
const int subGraphNum = 1;
EXPECT_EQ(function->GetTotalSubGraphCount(), subGraphNum);
}
TEST_F(GraphPartitionTest, TestAvoidSuperNodeLoop)
{
ComputationalGraphBuilder G;
std::vector<int64_t> tileShape{16, 16};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, {"t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8"}), true);
std::vector<Opcode> opCodes{Opcode::OP_A_MUL_B, Opcode::OP_A_MUL_B, Opcode::OP_A_MUL_B, Opcode::OP_A_MULACC_B};
std::vector<std::vector<std::string>> ioperands{{"t1", "t2"}, {"t2", "t3"}, {"t4", "t5"}, {"t3", "t6", "t7"}};
std::vector<std::vector<std::string>> ooperands{{"t3"}, {"t4"}, {"t6"}, {"t8"}};
std::vector<std::string> opNames{"MUL1", "MUL2", "MUL3", "MULACC"};
EXPECT_EQ(G.AddOps(opCodes, ioperands, ooperands, opNames, true), true);
EXPECT_EQ(G.SetInCast({"t1", "t2", "t5", "t7"}), true);
EXPECT_EQ(G.SetOutCast({"t8"}), true);
Function* function = G.GetFunction();
GraphPartition gpp;
EXPECT_EQ(gpp.RunOnFunction(*function), SUCCESS);
EXPECT_EQ(gpp.PostCheck(*function), SUCCESS);
}
TEST_F(GraphPartitionTest, TestBoundaryConvert)
{
ComputationalGraphBuilder G;
std::vector<std::string> tensorNames{"t1", "t2", "t3", "t4"};
std::vector<int64_t> tileShape{16, 16};
std::vector<MemoryType> tensorMemTypes{
MemoryType::MEM_UB, MemoryType::MEM_UB, MemoryType::MEM_L1, MemoryType::MEM_L0A};
std::vector<Opcode> opCodes{Opcode::OP_MULS, Opcode::OP_CONVERT, Opcode::OP_L1_TO_L0A};
std::vector<std::vector<std::string>> ioperands{{"t1"}, {"t2"}, {"t3"}};
std::vector<std::vector<std::string>> ooperands{{"t2"}, {"t3"}, {"t4"}};
std::vector<std::string> opNames{"muls", "convert", "L1ToL0A"};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, tensorMemTypes, tensorNames, 0), true);
EXPECT_EQ(G.AddOps(opCodes, ioperands, ooperands, opNames, true), true);
EXPECT_EQ(G.SetOutCast({"t4"}), true);
G.GetOp("convert")->SetOpAttribute(std::make_shared<ConvertOpAttribute>(MemoryType::MEM_UB, MemoryType::MEM_L1));
Function* function = G.GetFunction();
GraphPartition gpp;
EXPECT_EQ(gpp.RunOnFunction(*function), SUCCESS);
EXPECT_EQ(G.GetOp("muls")->GetSubgraphID(), G.GetOp("convert")->GetSubgraphID());
EXPECT_NE(G.GetOp("L1ToL0A")->GetSubgraphID(), G.GetOp("convert")->GetSubgraphID());
}
void ConstructGraphForMatMulViewFormSuperNode(ComputationalGraphBuilder& G)
{
DataType dataType = DataType::DT_FP16;
Shape shape = {16, 16};
Shape viewShape{8, 16};
std::vector<std::string> oriTensorNames{"matA1DDR", "matB1DDR", "matA1L1", "matB1L1",
"matA1L0A", "matB1L0B", "matC1L0C"};
std::vector<MemoryType> oriTensorMemoryType{
MemoryType::MEM_DEVICE_DDR, MemoryType::MEM_DEVICE_DDR, MemoryType::MEM_L1, MemoryType::MEM_L1,
MemoryType::MEM_L0A, MemoryType::MEM_L0B, MemoryType::MEM_L0C};
EXPECT_EQ(G.AddTensors(dataType, shape, oriTensorMemoryType, oriTensorNames, 0), true);
std::vector<std::string> afterViewTensorNames{"viewC1L0C", "outcast1"};
std::vector<MemoryType> afterViewTensorMemoryType{MemoryType::MEM_L0C, MemoryType::MEM_DEVICE_DDR};
EXPECT_EQ(G.AddTensors(dataType, viewShape, afterViewTensorMemoryType, afterViewTensorNames, 0), true);
std::vector<Opcode> opCodes{Opcode::OP_VIEW, Opcode::OP_VIEW, Opcode::OP_L1_TO_L0A, Opcode::OP_L1_TO_L0B,
Opcode::OP_A_MUL_B, Opcode::OP_VIEW, Opcode::OP_ASSEMBLE};
std::vector<std::string> opNames{"View1", "View2", "L1ToL0A1", "L1ToL0B1", "Mul1", "View3", "Assemble1"};
std::vector<std::vector<std::string>> iOperands{
{"matA1DDR"}, {"matB1DDR"}, {"matA1L1"}, {"matB1L1"}, {"matA1L0A", "matB1L0B"}, {"matC1L0C"}, {"viewC1L0C"}};
std::vector<std::vector<std::string>> oOperands{{"matA1L1"}, {"matB1L1"}, {"matA1L0A"}, {"matB1L0B"},
{"matC1L0C"}, {"viewC1L0C"}, {"outcast1"}};
EXPECT_EQ(G.AddOps(opCodes, iOperands, oOperands, opNames, true), true);
EXPECT_EQ(G.SetInCast({"matA1DDR", "matB1DDR"}), true);
EXPECT_EQ(G.SetOutCast({"outcast1"}), true);
}
TEST_F(GraphPartitionTest, TestMatMulViewFormSuperNode)
{
ComputationalGraphBuilder G;
ConstructGraphForMatMulViewFormSuperNode(G);
Function* function = G.GetFunction();
EXPECT_NE(function, nullptr);
GraphPartition gpp;
EXPECT_EQ(gpp.RunOnFunction(*function), SUCCESS);
auto mulOp = G.GetOp("Mul1");
auto viewOp = G.GetOp("View3");
EXPECT_EQ(mulOp->GetSubgraphID(), viewOp->GetSubgraphID());
}
void ConstructGraphForMatMulMultipleViewSuccessors(ComputationalGraphBuilder& G)
{
DataType dataType = DataType::DT_FP16;
Shape shape = {16, 16};
Shape viewShape{8, 16};
std::vector<std::string> oriTensorNames{"matA3DDR", "matB3DDR", "matA3L1", "matB3L1",
"matA3L0A", "matB3L0B", "matC3L0C"};
std::vector<MemoryType> oriTensorMemoryType{
MemoryType::MEM_DEVICE_DDR, MemoryType::MEM_DEVICE_DDR, MemoryType::MEM_L1, MemoryType::MEM_L1,
MemoryType::MEM_L0A, MemoryType::MEM_L0B, MemoryType::MEM_L0C};
EXPECT_EQ(G.AddTensors(dataType, shape, oriTensorMemoryType, oriTensorNames, 0), true);
std::vector<std::string> afterViewTensorNames{"viewC3L0C_1", "viewC3L0C_2", "outcast3"};
std::vector<MemoryType> afterViewTensorMemoryType{
MemoryType::MEM_L0C, MemoryType::MEM_L0C, MemoryType::MEM_DEVICE_DDR};
EXPECT_EQ(G.AddTensors(dataType, viewShape, afterViewTensorMemoryType, afterViewTensorNames, 0), true);
std::vector<Opcode> opCodes{Opcode::OP_VIEW, Opcode::OP_VIEW, Opcode::OP_L1_TO_L0A, Opcode::OP_L1_TO_L0B,
Opcode::OP_A_MUL_B, Opcode::OP_VIEW, Opcode::OP_VIEW, Opcode::OP_ASSEMBLE};
std::vector<std::string> opNames{"View1", "View2", "L1ToL0A3", "L1ToL0B3",
"Mul3", "ViewL0C_1", "ViewL0C_2", "Assemble3"};
std::vector<std::vector<std::string>> iOperands{
{"matA3DDR"}, {"matB3DDR"}, {"matA3L1"}, {"matB3L1"}, {"matA3L0A", "matB3L0B"},
{"matC3L0C"}, {"matC3L0C"}, {"viewC3L0C_1"}};
std::vector<std::vector<std::string>> oOperands{{"matA3L1"}, {"matB3L1"}, {"matA3L0A"}, {"matB3L0B"},
{"matC3L0C"}, {"viewC3L0C_1"}, {"viewC3L0C_2"}, {"outcast3"}};
EXPECT_EQ(G.AddOps(opCodes, iOperands, oOperands, opNames, true), true);
EXPECT_EQ(G.SetInCast({"matA3DDR", "matB3DDR"}), true);
EXPECT_EQ(G.SetOutCast({"outcast3", "viewC3L0C_2"}), true);
}
TEST_F(GraphPartitionTest, TestMatMulMultipleViewSuccessors)
{
ComputationalGraphBuilder G;
ConstructGraphForMatMulMultipleViewSuccessors(G);
Function* function = G.GetFunction();
EXPECT_NE(function, nullptr);
GraphPartition gpp;
EXPECT_EQ(gpp.RunOnFunction(*function), SUCCESS);
auto mulOp = G.GetOp("Mul3");
auto viewL0C_1 = G.GetOp("ViewL0C_1");
auto viewL0C_2 = G.GetOp("ViewL0C_2");
EXPECT_EQ(mulOp->GetSubgraphID(), viewL0C_1->GetSubgraphID());
EXPECT_EQ(mulOp->GetSubgraphID(), viewL0C_2->GetSubgraphID());
}
void ConstructGraphForMatMulViewNonL0C(ComputationalGraphBuilder& G)
{
DataType dataType = DataType::DT_FP16;
Shape shape = {16, 16};
Shape viewShape{8, 16};
std::vector<std::string> oriTensorNames{"matA4DDR", "matB4DDR", "matA4L1", "matB4L1",
"matA4L0A", "matB4L0B", "matC4L0C"};
std::vector<MemoryType> oriTensorMemoryType{
MemoryType::MEM_DEVICE_DDR, MemoryType::MEM_DEVICE_DDR, MemoryType::MEM_L1, MemoryType::MEM_L1,
MemoryType::MEM_L0A, MemoryType::MEM_L0B, MemoryType::MEM_L0C};
EXPECT_EQ(G.AddTensors(dataType, shape, oriTensorMemoryType, oriTensorNames, 0), true);
std::vector<std::string> afterViewTensorNames{"outcast1", "viewC4L1", "outcast2"};
std::vector<MemoryType> afterViewTensorMemoryType{
MemoryType::MEM_DEVICE_DDR, MemoryType::MEM_L1, MemoryType::MEM_DEVICE_DDR};
EXPECT_EQ(G.AddTensors(dataType, viewShape, afterViewTensorMemoryType, afterViewTensorNames, 0), true);
std::vector<Opcode> opCodes{Opcode::OP_VIEW, Opcode::OP_VIEW, Opcode::OP_L1_TO_L0A, Opcode::OP_L1_TO_L0B,
Opcode::OP_A_MUL_B, Opcode::OP_VIEW, Opcode::OP_VIEW, Opcode::OP_ASSEMBLE};
std::vector<std::string> opNames{"View1", "View2", "L1ToL0A4", "L1ToL0B4",
"Mul4", "ViewDDR", "ViewL1", "Assemble4"};
std::vector<std::vector<std::string>> iOperands{
{"matA4DDR"}, {"matB4DDR"}, {"matA4L1"}, {"matB4L1"}, {"matA4L0A", "matB4L0B"},
{"matC4L0C"}, {"matC4L0C"}, {"viewC4L1"}};
std::vector<std::vector<std::string>> oOperands{{"matA4L1"}, {"matB4L1"}, {"matA4L0A"}, {"matB4L0B"},
{"matC4L0C"}, {"outcast1"}, {"viewC4L1"}, {"outcast2"}};
EXPECT_EQ(G.AddOps(opCodes, iOperands, oOperands, opNames, true), true);
EXPECT_EQ(G.SetInCast({"matA4DDR", "matB4DDR"}), true);
EXPECT_EQ(G.SetOutCast({"outcast1", "outcast2"}), true);
}
TEST_F(GraphPartitionTest, TestMatMulViewNonL0C)
{
ComputationalGraphBuilder G;
ConstructGraphForMatMulViewNonL0C(G);
Function* function = G.GetFunction();
EXPECT_NE(function, nullptr);
GraphPartition gpp;
EXPECT_EQ(gpp.RunOnFunction(*function), SUCCESS);
auto mulOp = G.GetOp("Mul4");
auto viewDDROp = G.GetOp("ViewDDR");
auto viewL1Op = G.GetOp("ViewL1");
EXPECT_NE(mulOp->GetSubgraphID(), viewDDROp->GetSubgraphID());
EXPECT_EQ(mulOp->GetSubgraphID(), viewL1Op->GetSubgraphID());
}
TEST_F(GraphPartitionTest, TestViewAssembleScopeId)
{
ComputationalGraphBuilder G;
std::vector<std::string> tensorNames{"t1", "t2", "t3", "t4", "t5", "t6", "t7"};
std::vector<int64_t> tileShape{16, 16};
std::vector<MemoryType> tensorMemTypes{ MemoryType::MEM_UB, MemoryType::MEM_UB, MemoryType::MEM_UB,
MemoryType::MEM_UB, MemoryType::MEM_UB, MemoryType::MEM_UB, MemoryType::MEM_UB};
std::vector<Opcode> opCodes{Opcode::OP_MULS, Opcode::OP_ASSEMBLE,
Opcode::OP_ASSEMBLE, Opcode::OP_VIEW, Opcode::OP_VIEW, Opcode::OP_MULS};
std::vector<std::vector<std::string>> ioperands{{"t1"}, {"t2"}, {"t3"}, {"t4"}, {"t5"}, {"t6"}};
std::vector<std::vector<std::string>> ooperands{{"t2"}, {"t3"}, {"t4"}, {"t5"}, {"t6"}, {"t7"}};
std::vector<std::string> opNames{"m1", "a1", "a2", "v1", "v2", "m2"};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, tileShape, tensorMemTypes, tensorNames, 0), true);
EXPECT_EQ(G.AddOps(opCodes, ioperands, ooperands, opNames, true), true);
EXPECT_EQ(G.SetOutCast({"t3", "t4", "t7"}), true);
G.GetOp("m1")->SetScopeId(0);
G.GetOp("m2")->SetScopeId(1);
Function* function = G.GetFunction();
GraphPartition gpp;
EXPECT_EQ(gpp.RunOnFunction(*function), SUCCESS);
EXPECT_EQ(G.GetOp("a1")->GetScopeId(), 0);
EXPECT_EQ(G.GetOp("a2")->GetScopeId(), 0);
EXPECT_EQ(G.GetOp("v1")->GetScopeId(), 1);
EXPECT_EQ(G.GetOp("v2")->GetScopeId(), 1);
}
* Test UB-to-UB VIEW with dynamic offset should not be mergeable
*
* Graph structure:
* input_ub[MEM_UB, (32, 16)] --> VIEW[fromOffset=(0,0), dynOffset=(dynamic_offset_dim0, 0)] --> output_ub[MEM_UB,
* (32, 16)]
*
* Expected behavior:
* Node containing UB-to-UB VIEW with dynamic offset should be marked as not mergeable
*/
TEST_F(GraphPartitionTest, TestUbToUbViewWithDynOffsetNotMergeable)
{
ComputationalGraphBuilder G;
std::vector<int64_t> shape = {32, 16};
std::vector<MemoryType> tensorMemTypes{MemoryType::MEM_UB, MemoryType::MEM_UB};
std::vector<std::string> tensorNames{"inputUb", "outputUb"};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, shape, tensorMemTypes, tensorNames, 0), true);
std::vector<std::vector<std::string>> iOperands{{"inputUb"}};
std::vector<std::vector<std::string>> oOperands{{"outputUb"}};
std::vector<Opcode> opCodes{Opcode::OP_VIEW};
std::vector<std::string> opNames{"viewOp"};
EXPECT_EQ(G.AddOps(opCodes, iOperands, oOperands, opNames, true), true);
auto viewOp = G.GetOp("viewOp");
ASSERT_NE(viewOp, nullptr);
std::vector<SymbolicScalar> dynOffset = {CreateTestScalarVar("dynamicOffsetDim0"), IRBuilder().CreateConstInt(0)};
auto viewAttr = std::make_shared<ViewOpAttribute>(
std::vector<int64_t>{0, 0}, dynOffset, std::vector<SymbolicScalar>{IRBuilder().CreateConstInt(32), IRBuilder().CreateConstInt(16)});
viewOp->SetOpAttribute(viewAttr);
Function* function = G.GetFunction();
IsoPartitioner partitioner;
EXPECT_EQ(partitioner.SetParameter(10, 10, false), SUCCESS);
EXPECT_EQ(partitioner.PartitionGraph(*function), SUCCESS);
int opMagic = viewOp->GetOpMagic();
int opIdx = partitioner.operationInfo_->magic2Idx_[opMagic];
int nodeIdx = partitioner.superNodeInfo_->op2Node_[opIdx];
bool mergeable = partitioner.superNodeInfo_->GetNodeMergeable(partitioner.operationInfo_, nodeIdx);
EXPECT_EQ(mergeable, false);
}
* Test UB-to-UB VIEW without dynamic offset should be mergeable
*
* Graph structure:
* inputUb[MEM_UB, (32, 16)] --> VIEW[fromOffset=(0,0), dynOffset=empty] --> outputUb[MEM_UB, (32, 16)]
*
* Expected behavior:
* Node containing UB-to-UB VIEW without dynamic offset should be marked as mergeable
*/
TEST_F(GraphPartitionTest, TestUbToUbViewWithoutDynOffsetMergeable)
{
ComputationalGraphBuilder G;
std::vector<int64_t> shape = {32, 16};
std::vector<std::string> tensorNames{"inputUb", "outputUb"};
std::vector<MemoryType> tensorMemTypes{MemoryType::MEM_UB, MemoryType::MEM_UB};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, shape, tensorMemTypes, tensorNames, 0), true);
std::vector<Opcode> opCodes{Opcode::OP_VIEW};
std::vector<std::vector<std::string>> iOperands{{"inputUb"}};
std::vector<std::vector<std::string>> oOperands{{"outputUb"}};
std::vector<std::string> opNames{"viewOp"};
EXPECT_EQ(G.AddOps(opCodes, iOperands, oOperands, opNames, true), true);
auto viewOp = G.GetOp("viewOp");
ASSERT_NE(viewOp, nullptr);
auto viewAttr = std::make_shared<ViewOpAttribute>(
std::vector<int64_t>{0, 0}, std::vector<SymbolicScalar>{},
std::vector<SymbolicScalar>{IRBuilder().CreateConstInt(32), IRBuilder().CreateConstInt(16)});
viewOp->SetOpAttribute(viewAttr);
Function* function = G.GetFunction();
IsoPartitioner partitioner;
EXPECT_EQ(partitioner.SetParameter(10, 10, false), SUCCESS);
EXPECT_EQ(partitioner.PartitionGraph(*function), SUCCESS);
int opMagic = viewOp->GetOpMagic();
int opIdx = partitioner.operationInfo_->magic2Idx_[opMagic];
int nodeIdx = partitioner.superNodeInfo_->op2Node_[opIdx];
bool mergeable = partitioner.superNodeInfo_->GetNodeMergeable(partitioner.operationInfo_, nodeIdx);
EXPECT_EQ(mergeable, true);
}
* Test UB-to-UB ASSEMBLE with dynamic offset should not be mergeable
*
* Graph structure:
* inputUb[MEM_UB, (32, 16)] --> ASSEMBLE[toOffset=(0,0), toDynOffset=(dynamicOffsetDim0,0)] --> outputUb[MEM_UB, (32, 16)]
*
* Expected behavior:
* Node containing UB-to-UB ASSEMBLE with dynamic offset should be marked as not mergeable
*/
TEST_F(GraphPartitionTest, TestUbToUbAssembleWithDynOffsetNotMergeable)
{
ComputationalGraphBuilder G;
std::vector<int64_t> shape = {32, 16};
std::vector<MemoryType> tensorMemTypes{MemoryType::MEM_UB, MemoryType::MEM_UB};
std::vector<std::string> tensorNames{"inputUb", "outputUb"};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, shape, tensorMemTypes, tensorNames, 0), true);
std::vector<std::vector<std::string>> iOperands{{"inputUb"}};
std::vector<std::vector<std::string>> oOperands{{"outputUb"}};
std::vector<Opcode> opCodes{Opcode::OP_ASSEMBLE};
std::vector<std::string> opNames{"assembleOp"};
EXPECT_EQ(G.AddOps(opCodes, iOperands, oOperands, opNames, true), true);
auto assembleOp = G.GetOp("assembleOp");
ASSERT_NE(assembleOp, nullptr);
std::vector<SymbolicScalar> toDynOffset = {CreateTestScalarVar("dynamicOffsetDim0"), IRBuilder().CreateConstInt(0)};
auto assembleAttr = std::make_shared<AssembleOpAttribute>(std::vector<int64_t>{0, 0}, toDynOffset);
assembleOp->SetOpAttribute(assembleAttr);
Function* function = G.GetFunction();
IsoPartitioner partitioner;
EXPECT_EQ(partitioner.SetParameter(10, 10, false), SUCCESS);
EXPECT_EQ(partitioner.PartitionGraph(*function), SUCCESS);
int opMagic = assembleOp->GetOpMagic();
int opIdx = partitioner.operationInfo_->magic2Idx_[opMagic];
int nodeIdx = partitioner.superNodeInfo_->op2Node_[opIdx];
bool mergeable = partitioner.superNodeInfo_->GetNodeMergeable(partitioner.operationInfo_, nodeIdx);
EXPECT_EQ(mergeable, false);
}
* Test UB-to-UB ASSEMBLE without dynamic offset should be mergeable
*
* Graph structure:
* inputUb[MEM_UB, (32, 16)] --> ASSEMBLE[toOffset=(0,0), toDynOffset=empty] --> outputUb[MEM_UB, (32, 16)]
*
* Expected behavior:
* Node containing UB-to-UB ASSEMBLE without dynamic offset should be marked as mergeable
*/
TEST_F(GraphPartitionTest, TestUbToUbAssembleWithoutDynOffsetMergeable)
{
ComputationalGraphBuilder G;
std::vector<int64_t> shape = {32, 16};
std::vector<std::string> tensorNames{"inputUb", "outputUb"};
std::vector<MemoryType> tensorMemTypes{MemoryType::MEM_UB, MemoryType::MEM_UB};
EXPECT_EQ(G.AddTensors(DataType::DT_FP32, shape, tensorMemTypes, tensorNames, 0), true);
std::vector<Opcode> opCodes{Opcode::OP_ASSEMBLE};
std::vector<std::vector<std::string>> iOperands{{"inputUb"}};
std::vector<std::vector<std::string>> oOperands{{"outputUb"}};
std::vector<std::string> opNames{"assembleOp"};
EXPECT_EQ(G.AddOps(opCodes, iOperands, oOperands, opNames, true), true);
auto assembleOp = G.GetOp("assembleOp");
ASSERT_NE(assembleOp, nullptr);
auto assembleAttr = std::make_shared<AssembleOpAttribute>(std::vector<int64_t>{0, 0}, std::vector<SymbolicScalar>{});
assembleOp->SetOpAttribute(assembleAttr);
Function* function = G.GetFunction();
IsoPartitioner partitioner;
EXPECT_EQ(partitioner.SetParameter(10, 10, false), SUCCESS);
EXPECT_EQ(partitioner.PartitionGraph(*function), SUCCESS);
int opMagic = assembleOp->GetOpMagic();
int opIdx = partitioner.operationInfo_->magic2Idx_[opMagic];
int nodeIdx = partitioner.superNodeInfo_->op2Node_[opIdx];
bool mergeable = partitioner.superNodeInfo_->GetNodeMergeable(partitioner.operationInfo_, nodeIdx);
EXPECT_EQ(mergeable, true);
}
}
}