* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
* \file test_insert_sync.cpp
* \brief Unit test for InsertSync.
*/
#include <gtest/gtest.h>
#include "symbolic_scalar_test_utils.h"
#include "tilefwk/platform.h"
#include "interface/tensor/irbuilder.h"
#define private public
#include "passes/block_graph_pass/insert_sync.h"
#include "ut_json/ut_json_tool.h"
namespace npu {
namespace tile_fwk {
constexpr int IS_NUM1 = 1;
constexpr int IS_NUM2 = 2;
constexpr int IS_NUM3 = 3;
constexpr int IS_NUM4 = 4;
constexpr int IS_NUM5 = 5;
constexpr int IS_NUM8 = 8;
constexpr int IS_NUM9 = 9;
constexpr int IS_NUM10 = 10;
constexpr int IS_NUM16 = 16;
constexpr int IS_NUM18 = 18;
constexpr int IS_NUM19 = 19;
constexpr int IS_NUM20 = 20;
constexpr int IS_NUM29 = 29;
constexpr int IS_NUM30 = 30;
constexpr int IS_NUM32 = 32;
constexpr int IS_NUM39 = 39;
constexpr int IS_NUM40 = 40;
constexpr int IS_NUM49 = 49;
constexpr int IS_NUM50 = 50;
constexpr int IS_NUM59 = 59;
constexpr int IS_NUM60 = 60;
constexpr int IS_NUM69 = 69;
constexpr int IS_NUM70 = 70;
constexpr int IS_NUM79 = 79;
constexpr int IS_NUM80 = 80;
constexpr int IS_NUM89 = 89;
constexpr int IS_NUM90 = 90;
constexpr int IS_NUM99 = 99;
constexpr int IS_NUM100 = 100;
constexpr int IS_NUM101 = 101;
constexpr int IS_NUM109 = 109;
constexpr int IS_NUM110 = 110;
constexpr int IS_NUM119 = 119;
constexpr int IS_NUM120 = 120;
constexpr int IS_NUM129 = 129;
constexpr int IS_NUM130 = 130;
constexpr int IS_NUM139 = 139;
constexpr int IS_NUM140 = 140;
constexpr int IS_NUM149 = 149;
constexpr int IS_NUM150 = 150;
constexpr int IS_NUM159 = 159;
constexpr int IS_NUM160 = 160;
constexpr int IS_NUM169 = 169;
constexpr int IS_NUM170 = 170;
constexpr int IS_NUM179 = 179;
constexpr int IS_NUM180 = 180;
constexpr int IS_NUM189 = 189;
constexpr int IS_NUM190 = 190;
constexpr int IS_NUM199 = 199;
constexpr int IS_NUM200 = 200;
constexpr int IS_NUM209 = 209;
constexpr int IS_NUM210 = 210;
constexpr int IS_NUM219 = 219;
constexpr int IS_NUM220 = 220;
constexpr int IS_NUM229 = 229;
constexpr int IS_NUM230 = 230;
constexpr int IS_NUM239 = 239;
constexpr int IS_NUM240 = 240;
constexpr int IS_NUM249 = 249;
constexpr int IS_NUM250 = 250;
constexpr int IS_NUM259 = 259;
constexpr int IS_NUM260 = 260;
constexpr int IS_NUM269 = 269;
constexpr int IS_NUM270 = 270;
constexpr int IS_NUM279 = 279;
constexpr int IS_NUM280 = 280;
constexpr int IS_NUM289 = 289;
constexpr int IS_NUM290 = 290;
constexpr int IS_NUM299 = 299;
constexpr int IS_NUM300 = 300;
constexpr int IS_NUM499 = 499;
constexpr int IS_NUM500 = 500;
constexpr int IS_NUM600 = 600;
constexpr int IS_NUM699 = 699;
constexpr int IS_NUM700 = 700;
constexpr int IS_NUM800 = 800;
constexpr int IS_NUM900 = 900;
constexpr int IS_NUM1000 = 1000;
constexpr int IS_NUM1100 = 1100;
class InsertSyncTest : public ::testing::Test {
public:
static void SetUpTestCase() {}
static void TearDownTestCase() {}
void SetUp() override
{
Program::GetInstance().Reset();
config::Reset();
config::SetHostOption(COMPILE_STAGE, CS_EXECUTE_GRAPH);
config::SetPlatformConfig(KEY_ENABLE_COST_MODEL, false);
}
void TearDown() override {}
void AdjustCopyOpTileCfg(Operation& op, TileOpCfg& opcfg)
{
opcfg.aivCore_ = AIVCore::AIV0;
if (op.GetOpcode() == Opcode::OP_COPY_IN) {
opcfg.pipeIdStart_ = PipeType::PIPE_MTE2;
opcfg.pipeIdEnd_ = PipeType::PIPE_MTE2;
opcfg.coreType_ = CoreType::AIV;
} else if (op.GetOpcode() == Opcode::OP_COPY_OUT) {
opcfg.pipeIdStart_ = PipeType::PIPE_MTE3;
opcfg.pipeIdEnd_ = PipeType::PIPE_MTE3;
opcfg.coreType_ = CoreType::AIV;
}
}
void BuildDeps(
PipeSync& ps, DataDependencySearcher& dataDependencySearcher, std::vector<Operation*>& opLogPtr,
std::vector<IndexOp>& synced)
{
for (size_t i = 0; i < opLogPtr.size(); i++) {
auto opcfg = OpcodeManager::Inst().GetTileOpCfg(opLogPtr[i]->GetOpcode());
AdjustCopyOpTileCfg(*opLogPtr[i], opcfg);
PipeSync::DepOp op(i, {opcfg.pipeIdStart_, opcfg.pipeIdEnd_, opcfg.coreType_, opcfg.aivCore_});
PipeSync::DepOp& currOp = ps.depOps_.emplace_back(op);
auto dataDependencySet = dataDependencySearcher.Find(opLogPtr[i]);
for (auto it = dataDependencySet.rbegin(); it != dataDependencySet.rend(); it++) {
size_t k = *it;
PipeSync::DepOp& prevOp = ps.depOps_[k];
if (ps.HasDataDependency(*opLogPtr[k], *opLogPtr[i])) {
ps.UpdateDep(currOp, prevOp);
}
}
dataDependencySearcher.Insert(opLogPtr[i], i);
ps.EnqueueOp(currOp, opLogPtr, synced);
}
}
};
TEST_F(InsertSyncTest, TestEnableDebug)
{
auto rootFuncPtr = std::make_shared<Function>(Program::GetInstance(), "TestParams", "TestParams", nullptr);
rootFuncPtr->rootFunc_ = rootFuncPtr.get();
auto currFunctionPtr =
std::make_shared<Function>(Program::GetInstance(), "TestAddParams", "TestAddParams", rootFuncPtr.get());
EXPECT_TRUE(currFunctionPtr != nullptr);
rootFuncPtr->rootFunc_->programs_.emplace(currFunctionPtr->GetFuncMagic(), currFunctionPtr.get());
std::vector<int64_t> shape = {IS_NUM8, IS_NUM16};
auto shapeImme = OpImmediate::Specified(shape);
auto incast1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto incast2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto ubTensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto ubTensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto ubTensor3 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto outCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto& copy_op1 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_IN, {incast1}, {ubTensor1});
(void)copy_op1;
auto& copy_op2 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_IN, {incast2}, {ubTensor2});
(void)copy_op2;
auto& add_op = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_ADD, {ubTensor1, ubTensor2}, {ubTensor3});
(void)add_op;
auto& copy_out_op = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_OUT, {ubTensor3}, {outCast});
(void)copy_out_op;
currFunctionPtr->inCasts_.push_back(incast1);
currFunctionPtr->inCasts_.push_back(incast2);
currFunctionPtr->outCasts_.push_back(outCast);
InsertSync syncPass;
syncPass.SetEnableDebug(true);
syncPass.RunOnFunction(*rootFuncPtr);
EXPECT_TRUE(true);
}
std::vector<std::shared_ptr<LogicalTensor>> AddOpForTestFindDep(
std::vector<Operation*>& opLogPtr, std::shared_ptr<Function> currFunctionPtr)
{
std::vector<int64_t> shape1 = {IS_NUM16, IS_NUM16};
std::vector<int64_t> shape2 = {IS_NUM8, IS_NUM16};
auto tensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape2, CreateTestConstIntVector(shape2));
tensor1->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor1->memoryrange.start = 0;
tensor1->memoryrange.end = IS_NUM100;
auto tensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
tensor2->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor2->memoryrange.start = 0;
tensor2->memoryrange.end = IS_NUM200;
auto tensor3 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape2, CreateTestConstIntVector(shape2));
tensor3->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor3->memoryrange.start = IS_NUM300;
tensor3->memoryrange.end = IS_NUM499;
auto tensor4 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape2, CreateTestConstIntVector(shape2));
tensor4->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor4->memoryrange.start = IS_NUM500;
tensor4->memoryrange.end = IS_NUM699;
auto tensor5 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape2, CreateTestConstIntVector(shape2));
tensor5->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor5->memoryrange.start = IS_NUM700;
tensor5->memoryrange.end = IS_NUM900;
auto tensor6 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape2, CreateTestConstIntVector(shape2));
tensor6->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor6->memoryrange.start = IS_NUM150;
tensor6->memoryrange.end = IS_NUM200;
auto& expend = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_EXPAND, {tensor1}, {tensor2});
opLogPtr.emplace_back(&expend);
auto& copyin1 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_IN, {tensor2}, {tensor3});
opLogPtr.emplace_back(©in1);
auto& copyin2 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_IN, {tensor2}, {tensor4});
opLogPtr.emplace_back(©in2);
auto& copyin3 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_IN, {tensor2}, {tensor5});
opLogPtr.emplace_back(©in3);
auto& exp = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_EXP, {tensor3}, {tensor6});
opLogPtr.emplace_back(&exp);
return {tensor1, tensor2, tensor3, tensor4, tensor5, tensor6};
}
void CheckDependencyForTestFindDep(
PipeSync& ps, std::set<int> dataDependencySet, std::vector<Operation*>& opLogPtr, size_t i)
{
for (auto it = dataDependencySet.rbegin(); it != dataDependencySet.rend(); it++) {
size_t k = *it;
if (i == IS_NUM1 && k == 0) {
EXPECT_EQ(ps.CheckRawDependency(*opLogPtr[k], *opLogPtr[i]), true);
EXPECT_EQ(ps.CheckWarDependency(*opLogPtr[k], *opLogPtr[i]), false);
EXPECT_EQ(ps.CheckWawDependency(*opLogPtr[k], *opLogPtr[i]), false);
}
if (i == IS_NUM4 && k == 0) {
EXPECT_EQ(ps.CheckRawDependency(*opLogPtr[k], *opLogPtr[i]), false);
EXPECT_EQ(ps.CheckWarDependency(*opLogPtr[k], *opLogPtr[i]), false);
EXPECT_EQ(ps.CheckWawDependency(*opLogPtr[k], *opLogPtr[i]), true);
}
if (i == IS_NUM4 && k == IS_NUM1) {
EXPECT_EQ(ps.CheckRawDependency(*opLogPtr[k], *opLogPtr[i]), true);
EXPECT_EQ(ps.CheckWarDependency(*opLogPtr[k], *opLogPtr[i]), true);
EXPECT_EQ(ps.CheckWawDependency(*opLogPtr[k], *opLogPtr[i]), false);
}
}
}
void ProcessOpList(PipeSync& ps, DataDependencySearcher& dataDependencySearcher, std::vector<Operation*>& opLogPtr)
{
ps.oriOpList_ = opLogPtr;
for (auto& op : opLogPtr) {
bool isCubeComponent = op->HasAttr(OpAttributeKey::isCube) && op->GetAttr<bool>(OpAttributeKey::isCube);
if (!isCubeComponent) {
op->SetAIVCore(AIVCore::AIV0);
}
ps.BuildTensorRangeMap(op);
}
dataDependencySearcher.ubTensorRangeMap = ps.ubTensorRangeMap;
}
TEST_F(InsertSyncTest, TestFindDep)
{
auto rootFuncPtr = std::make_shared<Function>(Program::GetInstance(), "TestFindDep", "TestFindDep", nullptr);
rootFuncPtr->rootFunc_ = rootFuncPtr.get();
auto currFunctionPtr =
std::make_shared<Function>(Program::GetInstance(), "TestFindDepLeaf", "TestFindDepLeaf", rootFuncPtr.get());
EXPECT_TRUE(currFunctionPtr != nullptr);
rootFuncPtr->rootFunc_->programs_.emplace(currFunctionPtr->GetFuncMagic(), currFunctionPtr.get());
std::vector<Operation*> opLogPtr;
auto tensors = AddOpForTestFindDep(opLogPtr, currFunctionPtr);
PipeSync ps;
DataDependencySearcher dataDependencySearcher;
ProcessOpList(ps, dataDependencySearcher, opLogPtr);
for (size_t i = 0; i < opLogPtr.size(); i++) {
auto opcfg = OpcodeManager::Inst().GetTileOpCfg(opLogPtr[i]->GetOpcode());
AdjustCopyOpTileCfg(*opLogPtr[i], opcfg);
PipeSync::DepOp op(i, {opcfg.pipeIdStart_, opcfg.pipeIdEnd_, opcfg.coreType_, opcfg.aivCore_});
ps.depOps_.emplace_back(op);
auto dataDependencySet = dataDependencySearcher.Find(opLogPtr[i]);
if (i == IS_NUM1 || i == IS_NUM2 || i == IS_NUM3) {
std::set<int> res = {0};
EXPECT_EQ(dataDependencySet, res);
}
if (i == IS_NUM4) {
std::set<int> res = {0, IS_NUM1, IS_NUM2, IS_NUM3};
EXPECT_EQ(dataDependencySet, res);
}
CheckDependencyForTestFindDep(ps, dataDependencySet, opLogPtr, i);
dataDependencySearcher.Insert(opLogPtr[i], i);
}
EXPECT_EQ(ps.IgnorableIntraPipeDep(0, IS_NUM4, opLogPtr), false);
tensors[1]->shape = {IS_NUM16, IS_NUM32, IS_NUM32};
EXPECT_EQ(ps.IgnorableIntraPipeDep(0, IS_NUM4, opLogPtr), false);
auto opcfg1 = OpcodeManager::Inst().GetTileOpCfg(opLogPtr[1]->GetOpcode());
EXPECT_EQ(ps.AdjustOpCfg(opcfg1, *opLogPtr[1]), FAILED);
std::vector<int64_t> shape = {IS_NUM8, IS_NUM16};
auto tensor7 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor7->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor7->memoryrange.start = IS_NUM1000;
tensor7->memoryrange.end = IS_NUM1100;
auto& copyout = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_OUT, {tensors[4]}, {tensor7});
opLogPtr.emplace_back(©out);
auto opcfg2 = OpcodeManager::Inst().GetTileOpCfg(opLogPtr[IS_NUM1]->GetOpcode());
EXPECT_EQ(ps.AdjustOpCfg(opcfg2, *opLogPtr[IS_NUM5]), FAILED);
}
TEST_F(InsertSyncTest, TestPhaseKernelProcess)
{
auto rootFuncPtr =
std::make_shared<Function>(Program::GetInstance(), "TestPhaseKernelProcess", "TestPhaseKernelProcess", nullptr);
rootFuncPtr->rootFunc_ = rootFuncPtr.get();
auto currFunctionPtr = std::make_shared<Function>(
Program::GetInstance(), "TestPhaseKernelProcessLeaf", "TestPhaseKernelProcessLeaf", rootFuncPtr.get());
EXPECT_TRUE(currFunctionPtr != nullptr);
rootFuncPtr->rootFunc_->programs_.emplace(currFunctionPtr->GetFuncMagic(), currFunctionPtr.get());
std::vector<int64_t> shape = {IS_NUM16, IS_NUM16};
auto tensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto tensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto tensor3 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto tensor4 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto tensor5 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto tensor6 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
std::vector<Operation*> opLogPtr;
auto& copyin1 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_IN, {tensor1}, {tensor3});
opLogPtr.emplace_back(©in1);
auto& copyin2 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_IN, {tensor2}, {tensor4});
opLogPtr.emplace_back(©in2);
auto& add = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_ADD, {tensor3, tensor4}, {tensor5});
opLogPtr.emplace_back(&add);
auto& copyout = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_OUT, {tensor5}, {tensor6});
opLogPtr.emplace_back(©out);
PipeSync ps;
std::vector<Operation*> resLogPtr;
ps.PhaseKernelProcess(*currFunctionPtr, opLogPtr, resLogPtr);
EXPECT_EQ(resLogPtr[0]->GetOpcode(), Opcode::OP_PHASE1);
EXPECT_EQ(resLogPtr[IS_NUM3]->GetOpcode(), Opcode::OP_PHASE2);
}
void AddOpForTestUpdateDep(std::vector<Operation*>& opLogPtr, std::shared_ptr<Function> currFunctionPtr)
{
std::vector<int64_t> shape = {IS_NUM16, IS_NUM16};
auto tensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor1->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor1->memoryrange.start = 0;
tensor1->memoryrange.end = IS_NUM100;
auto tensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor2->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor2->memoryrange.start = IS_NUM200;
tensor2->memoryrange.end = IS_NUM300;
auto tensor3 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor3->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor3->memoryrange.start = IS_NUM200;
tensor3->memoryrange.end = IS_NUM300;
auto tensor4 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor4->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor4->memoryrange.start = IS_NUM200;
tensor4->memoryrange.end = IS_NUM300;
auto tensor5 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor5->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor5->memoryrange.start = IS_NUM500;
tensor5->memoryrange.end = IS_NUM600;
auto tensor6 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor6->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor6->memoryrange.start = IS_NUM500;
tensor6->memoryrange.end = IS_NUM600;
auto tensor7 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor7->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor7->memoryrange.start = 0;
tensor7->memoryrange.end = IS_NUM100;
auto tensor8 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor8->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor8->memoryrange.start = IS_NUM101;
tensor8->memoryrange.end = IS_NUM199;
auto& copyin1 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_IN, {tensor1}, {tensor8});
opLogPtr.emplace_back(©in1);
auto& copyin2 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_IN, {tensor2}, {tensor3});
opLogPtr.emplace_back(©in2);
auto& cast1 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_CAST, {tensor3}, {tensor4});
opLogPtr.emplace_back(&cast1);
auto& cast2 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_CAST, {tensor5}, {tensor6});
opLogPtr.emplace_back(&cast2);
auto& copyout = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_OUT, {tensor6}, {tensor7});
opLogPtr.emplace_back(©out);
}
TEST_F(InsertSyncTest, TestUpdateDep)
{
auto rootFuncPtr = std::make_shared<Function>(Program::GetInstance(), "TestUpdateDep", "TestUpdateDep", nullptr);
rootFuncPtr->rootFunc_ = rootFuncPtr.get();
auto currFunctionPtr =
std::make_shared<Function>(Program::GetInstance(), "TestUpdateDepLeaf", "TestUpdateDepLeaf", rootFuncPtr.get());
EXPECT_TRUE(currFunctionPtr != nullptr);
rootFuncPtr->rootFunc_->programs_.emplace(currFunctionPtr->GetFuncMagic(), currFunctionPtr.get());
std::vector<Operation*> opLogPtr;
AddOpForTestUpdateDep(opLogPtr, currFunctionPtr);
PipeSync ps;
DataDependencySearcher dataDependencySearcher;
ProcessOpList(ps, dataDependencySearcher, opLogPtr);
for (size_t i = 0; i < opLogPtr.size(); i++) {
auto opcfg = OpcodeManager::Inst().GetTileOpCfg(opLogPtr[i]->GetOpcode());
AdjustCopyOpTileCfg(*opLogPtr[i], opcfg);
PipeSync::DepOp op(i, {opcfg.pipeIdStart_, opcfg.pipeIdEnd_, opcfg.coreType_, opcfg.aivCore_});
auto& currOp = ps.depOps_.emplace_back(op);
auto dataDependencySet = dataDependencySearcher.Find(opLogPtr[i]);
for (auto it = dataDependencySet.rbegin(); it != dataDependencySet.rend(); it++) {
size_t k = *it;
auto& prevOp = ps.depOps_[k];
if (ps.HasDataDependency(*opLogPtr[k], *opLogPtr[i])) {
ps.UpdateDep(currOp, prevOp);
PipeSync::PipeCoreRealEx pcCurr(PipeType::PIPE_MTE3, CoreType::AIV, AIVCore::AIV0);
PipeSync::PipeCoreRealEx pcSet1(PipeType::PIPE_V, CoreType::AIV, AIVCore::AIV0);
PipeSync::PipeCoreRealEx pcSet2(PipeType::PIPE_MTE2, CoreType::AIV, AIVCore::AIV0);
auto setPipeIdx1 = ps.latestPipeDep_[pcCurr].setPipes[pcSet1];
auto setPipeIdx2 = ps.latestPipeDep_[pcCurr].setPipes[pcSet2];
ps.latestPipeDep_[pcCurr].DumpPipeDepInfo();
if (i == IS_NUM4 && k == IS_NUM3) {
EXPECT_EQ(setPipeIdx1, IS_NUM3);
EXPECT_EQ(setPipeIdx2, IS_NUM1);
}
if (i == IS_NUM4 && k == 0) {
EXPECT_EQ(setPipeIdx1, IS_NUM3);
EXPECT_EQ(setPipeIdx2, IS_NUM1);
}
}
}
dataDependencySearcher.Insert(opLogPtr[i], i);
}
}
void AddOpForTestHandleEventID(std::vector<Operation*>& opLogPtr, std::shared_ptr<Function> currFunctionPtr)
{
std::vector<int64_t> shape = {IS_NUM16, IS_NUM16};
auto tensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor1->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor1->memoryrange.start = 0;
tensor1->memoryrange.end = IS_NUM99;
auto tensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor2->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor2->memoryrange.start = IS_NUM100;
tensor2->memoryrange.end = IS_NUM199;
auto tensor3 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor3->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor3->memoryrange.start = IS_NUM200;
tensor3->memoryrange.end = IS_NUM300;
auto tensor4 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor4->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor4->memoryrange.start = IS_NUM500;
tensor4->memoryrange.end = IS_NUM600;
auto tensor5 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor5->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor5->memoryrange.start = IS_NUM700;
tensor5->memoryrange.end = IS_NUM800;
auto& copyin1 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_IN, {tensor1}, {tensor2});
opLogPtr.emplace_back(©in1);
auto& copyin2 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_IN, {tensor4}, {tensor5});
opLogPtr.emplace_back(©in2);
auto& cast = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_CAST, {tensor2}, {tensor3});
opLogPtr.emplace_back(&cast);
}
TEST_F(InsertSyncTest, TestHandleEventID)
{
auto rootFuncPtr =
std::make_shared<Function>(Program::GetInstance(), "TestHandleEventID", "TestHandleEventID", nullptr);
rootFuncPtr->rootFunc_ = rootFuncPtr.get();
auto currFunctionPtr = std::make_shared<Function>(
Program::GetInstance(), "TestHandleEventIDLeaf", "TestHandleEventIDLeaf", rootFuncPtr.get());
EXPECT_TRUE(currFunctionPtr != nullptr);
rootFuncPtr->rootFunc_->programs_.emplace(currFunctionPtr->GetFuncMagic(), currFunctionPtr.get());
std::vector<Operation*> opLogPtr;
AddOpForTestHandleEventID(opLogPtr, currFunctionPtr);
PipeSync ps;
ps.InitIssueQueue();
DataDependencySearcher dataDependencySearcher;
ProcessOpList(ps, dataDependencySearcher, opLogPtr);
std::vector<IndexOp> synced;
BuildDeps(ps, dataDependencySearcher, opLogPtr, synced);
EXPECT_EQ(ps.depOps_[0].setPipe[0], IS_NUM2);
EXPECT_EQ(ps.depOps_[IS_NUM2].waitPipe[0], 0);
bool eventIdDeadlock = true;
bool res = false;
PipeSync::IssueNum issuenum;
PipeSync::IssueQueue& issueQ = ps.issueState_[IS_NUM4];
PipeSync::DepOp& handleOp = ps.depOps_[0];
PipeSync::DepOp& eleOp = ps.depOps_[IS_NUM2];
PipeSync::PipeCoreRealEx currPipeCoreEx(handleOp.selfPipeCore.pipeEnd, handleOp.selfPipeCore.core, handleOp.selfPipeCore.aivCore);
PipeSync::PipeCoreRealEx elePipeCoreEx(eleOp.selfPipeCore.pipeStart, eleOp.selfPipeCore.core, eleOp.selfPipeCore.aivCore);
PipeSync::PipePairEx pp{currPipeCoreEx, elePipeCoreEx};
issuenum.maxIssueNum[pp] = IS_NUM8;
issuenum.currIssueNum[pp] = IS_NUM8;
EXPECT_EQ(ps.depOps_[IS_NUM2].waitPipe[0], 0);
EXPECT_EQ(ps.depOps_[0].setPipe[0], IS_NUM2);
ps.HandleEventID(handleOp, issueQ, issuenum, eventIdDeadlock, res, synced);
issueQ.DumpIssueQueue(ps.oriOpList_);
ps.DumpLatestPipeDepMap();
EXPECT_EQ(ps.depOps_[IS_NUM2].waitPipe[0], IS_NUM1);
EXPECT_EQ(ps.depOps_[IS_NUM1].setPipe[0], IS_NUM2);
EXPECT_EQ(ps.depOps_[0].setPipe.size(), 0);
PipeSync::PipeCoreRealEx setPipeEx(PIPE_MTE3, CoreType::AIV, AIVCore::AIV1);
PipeSync::PipeCoreRealEx waitPipeEx(PIPE_MTE1, CoreType::AIC, AIVCore::UNSPECIFIED);
PipeSync::PipePairEx pipePairEx = {setPipeEx, waitPipeEx};
PipeSync::PipePairEx pipePairExReverse = {waitPipeEx, setPipeEx};
ps.InitCVEventIdQ(pipePairEx);
EXPECT_EQ(ps.crossCoreFreeEventId_[pipePairEx].size(), IS_NUM2);
EXPECT_EQ(ps.crossCoreFreeEventId_[pipePairExReverse].size(), 0);
}
TEST_F(InsertSyncTest, TestRelaxFakeDataDep)
{
auto rootFuncPtr =
std::make_shared<Function>(Program::GetInstance(), "TestRelaxFakeDataDep", "TestRelaxFakeDataDep", nullptr);
rootFuncPtr->rootFunc_ = rootFuncPtr.get();
auto currFunctionPtr = std::make_shared<Function>(
Program::GetInstance(), "TestRelaxFakeDataDepLeaf", "TestRelaxFakeDataDepLeaf", rootFuncPtr.get());
EXPECT_TRUE(currFunctionPtr != nullptr);
rootFuncPtr->rootFunc_->programs_.emplace(currFunctionPtr->GetFuncMagic(), currFunctionPtr.get());
std::vector<int64_t> shape = {IS_NUM8, IS_NUM8};
auto tensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor1->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor1->memoryrange.start = IS_NUM100;
tensor1->memoryrange.end = IS_NUM109;
auto tensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor2->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor2->memoryrange.start = 0;
tensor2->memoryrange.end = IS_NUM9;
auto tensor3 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor3->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor3->memoryrange.start = IS_NUM10;
tensor3->memoryrange.end = IS_NUM19;
auto tensor4 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor4->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor4->memoryrange.start = IS_NUM20;
tensor4->memoryrange.end = IS_NUM29;
auto tensor5 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor5->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor5->memoryrange.start = IS_NUM30;
tensor5->memoryrange.end = IS_NUM39;
auto tensor6 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor6->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor6->memoryrange.start = IS_NUM40;
tensor6->memoryrange.end = IS_NUM49;
auto tensor7 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor7->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor7->memoryrange.start = IS_NUM50;
tensor7->memoryrange.end = IS_NUM59;
auto tensor8 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor8->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor8->memoryrange.start = IS_NUM60;
tensor8->memoryrange.end = IS_NUM69;
auto tensor9 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor9->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor9->memoryrange.start = IS_NUM70;
tensor9->memoryrange.end = IS_NUM79;
auto tensor10 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor10->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor10->memoryrange.start = IS_NUM80;
tensor10->memoryrange.end = IS_NUM89;
auto tensor11 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor11->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor11->memoryrange.start = IS_NUM90;
tensor11->memoryrange.end = IS_NUM99;
auto tensor12 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor12->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor12->memoryrange.start = IS_NUM200;
tensor12->memoryrange.end = IS_NUM209;
auto tensor13 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor13->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor13->memoryrange.start = IS_NUM210;
tensor13->memoryrange.end = IS_NUM219;
auto tensor14 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor14->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor14->memoryrange.start = IS_NUM220;
tensor14->memoryrange.end = IS_NUM229;
auto tensor15 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor15->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor15->memoryrange.start = IS_NUM230;
tensor15->memoryrange.end = IS_NUM239;
auto tensor16 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor16->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor16->memoryrange.start = IS_NUM240;
tensor16->memoryrange.end = IS_NUM249;
auto tensor17 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor17->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor17->memoryrange.start = IS_NUM250;
tensor17->memoryrange.end = IS_NUM259;
auto tensor18 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor18->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor18->memoryrange.start = IS_NUM260;
tensor18->memoryrange.end = IS_NUM269;
auto tensor19 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor19->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor19->memoryrange.start = IS_NUM270;
tensor19->memoryrange.end = IS_NUM279;
auto tensor20 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor20->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor20->memoryrange.start = IS_NUM280;
tensor20->memoryrange.end = IS_NUM289;
auto tensor21 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor21->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor21->memoryrange.start = IS_NUM290;
tensor21->memoryrange.end = IS_NUM299;
auto tensor22 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor22->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor22->memoryrange.start = IS_NUM110;
tensor22->memoryrange.end = IS_NUM119;
auto tensor23 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor23->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor23->memoryrange.start = IS_NUM120;
tensor23->memoryrange.end = IS_NUM129;
auto tensor24 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor24->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor24->memoryrange.start = IS_NUM130;
tensor24->memoryrange.end = IS_NUM139;
auto tensor25 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor25->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor25->memoryrange.start = IS_NUM140;
tensor25->memoryrange.end = IS_NUM149;
auto tensor26 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor26->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor26->memoryrange.start = IS_NUM150;
tensor26->memoryrange.end = IS_NUM159;
auto tensor27 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor27->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor27->memoryrange.start = IS_NUM160;
tensor27->memoryrange.end = IS_NUM169;
auto tensor28 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor28->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor28->memoryrange.start = IS_NUM170;
tensor28->memoryrange.end = IS_NUM179;
auto tensor29 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
tensor29->SetMemoryTypeBoth(MemoryType::MEM_UB);
tensor29->memoryrange.start = IS_NUM180;
tensor29->memoryrange.end = IS_NUM189;
std::vector<Operation*> opLogPtr;
auto& copyin1 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_IN, {tensor1}, {tensor2});
opLogPtr.emplace_back(©in1);
auto& copyin2 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_IN, {tensor1}, {tensor3});
opLogPtr.emplace_back(©in2);
auto& copyin3 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_IN, {tensor1}, {tensor4});
opLogPtr.emplace_back(©in3);
auto& copyin4 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_IN, {tensor1}, {tensor5});
opLogPtr.emplace_back(©in4);
auto& copyin5 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_IN, {tensor1}, {tensor6});
opLogPtr.emplace_back(©in5);
auto& copyin6 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_IN, {tensor1}, {tensor7});
opLogPtr.emplace_back(©in6);
auto& copyin7 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_IN, {tensor1}, {tensor8});
opLogPtr.emplace_back(©in7);
auto& copyin8 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_IN, {tensor1}, {tensor9});
opLogPtr.emplace_back(©in8);
auto& copyin9 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_IN, {tensor1}, {tensor10});
opLogPtr.emplace_back(©in9);
auto& copyin10 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_COPY_IN, {tensor1}, {tensor11});
opLogPtr.emplace_back(©in10);
auto& cast1 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_CAST, {tensor1}, {tensor22});
opLogPtr.emplace_back(&cast1);
auto& cast2 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_CAST, {tensor1}, {tensor23});
opLogPtr.emplace_back(&cast2);
auto& cast3 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_CAST, {tensor1}, {tensor24});
opLogPtr.emplace_back(&cast3);
auto& cast4 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_CAST, {tensor1}, {tensor25});
opLogPtr.emplace_back(&cast4);
auto& cast5 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_CAST, {tensor1}, {tensor26});
opLogPtr.emplace_back(&cast5);
auto& cast6 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_CAST, {tensor1}, {tensor27});
opLogPtr.emplace_back(&cast6);
auto& cast7 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_CAST, {tensor1}, {tensor28});
opLogPtr.emplace_back(&cast7);
auto& cast8 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_CAST, {tensor1}, {tensor29});
opLogPtr.emplace_back(&cast8);
auto& cast9 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_CAST, {tensor2}, {tensor12});
opLogPtr.emplace_back(&cast9);
auto& cast10 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_CAST, {tensor3}, {tensor13});
opLogPtr.emplace_back(&cast10);
auto& cast11 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_CAST, {tensor4}, {tensor14});
opLogPtr.emplace_back(&cast11);
auto& cast12 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_CAST, {tensor5}, {tensor15});
opLogPtr.emplace_back(&cast12);
auto& cast13 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_CAST, {tensor6}, {tensor16});
opLogPtr.emplace_back(&cast13);
auto& cast14 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_CAST, {tensor7}, {tensor17});
opLogPtr.emplace_back(&cast14);
auto& cast15 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_CAST, {tensor8}, {tensor18});
opLogPtr.emplace_back(&cast15);
auto& cast16 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_CAST, {tensor9}, {tensor19});
opLogPtr.emplace_back(&cast16);
auto& cast17 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_CAST, {tensor10}, {tensor20});
opLogPtr.emplace_back(&cast17);
auto& cast18 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_CAST, {tensor11}, {tensor21});
opLogPtr.emplace_back(&cast18);
PipeSync ps;
DataDependencySearcher dataDependencySearcher;
ProcessOpList(ps, dataDependencySearcher, opLogPtr);
std::vector<IndexOp> synced;
size_t index = UINT64_MAX;
EXPECT_EQ(ps.InjectSync(*currFunctionPtr, opLogPtr, index, synced), FAILED);
BuildDeps(ps, dataDependencySearcher, opLogPtr, synced);
size_t totalIssued = 0;
size_t allIssued = 0;
for (int i = 0; i < static_cast<int>(PipeSeq::PIPE_END); i++) {
allIssued += ps.issueState_[i].ops.size();
}
bool eventIdDeadlock = false;
uint64_t eventIdDeadlockEnterTimes = 0;
while (totalIssued < allIssued) {
size_t issued = 0;
size_t issuedTest = IS_NUM100;
for (int i = 0; i < static_cast<int>(PipeSeq::PIPE_END); i++) {
std::vector<size_t> issuedOps;
ps.PopFromQueue(ps.issueState_[i], issuedOps, eventIdDeadlock, synced);
issued += issuedOps.size();
if (i == IS_NUM4) {
issuedTest = issuedOps.size();
}
for (auto idx : issuedOps) {
ps.InjectSync(*currFunctionPtr, opLogPtr, idx, synced);
}
if (issuedTest == static_cast<size_t>(0)) {
break;
}
}
totalIssued += issued;
if (issuedTest == static_cast<size_t>(0)) {
EXPECT_EQ(ps.depOps_[0].setPipe[0], IS_NUM18);
EXPECT_EQ(ps.depOps_[IS_NUM1].setPipe[0], IS_NUM19);
ps.ProcessDeadLock(eventIdDeadlockEnterTimes, eventIdDeadlock, synced);
EXPECT_EQ(ps.depOps_[IS_NUM1].setPipe[0], IS_NUM18);
continue;
}
eventIdDeadlock = false;
eventIdDeadlockEnterTimes = static_cast<size_t>(0);
break;
}
}
TEST_F(InsertSyncTest, TestGetDepInfoSizeMismatch)
{
PipeSync ps;
std::vector<IndexOp> emptySyncedOpLog;
auto pipePair = PipeSync::dataDepPair[0];
PipeSync::DataDepInfo depInfo;
EXPECT_EQ(ps.GetDepInfo(emptySyncedOpLog, pipePair, depInfo), FAILED);
EXPECT_EQ(ps.PipeSeqName(PipeSeq::AIV0_MTE2), "AIV0_MTE2");
EXPECT_EQ(ps.PipeSeqName(PipeSeq::AIV1_MTE2), "AIV1_MTE2");
EXPECT_EQ(ps.PipeSeqName(PipeSeq::AIV0_V), "AIV0_V");
EXPECT_EQ(ps.PipeSeqName(PipeSeq::AIV1_V), "AIV1_V");
EXPECT_EQ(ps.PipeSeqName(PipeSeq::AIV0_MTE3), "AIV0_MTE3");
EXPECT_EQ(ps.PipeSeqName(PipeSeq::AIV1_MTE3), "AIV1_MTE3");
EXPECT_EQ(ps.PipeSeqName(PipeSeq::AIV0_S), "AIV0_S");
EXPECT_EQ(ps.PipeSeqName(PipeSeq::AIV1_S), "AIV1_S");
}
}
}
#undef private