* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
* \file test_expand_function.cpp
* \brief Unit test for ExpandFunction pass.
*/
#include <gtest/gtest.h>
#include <vector>
#include <string>
#include "interface/function/function.h"
#include "interface/tensor/irbuilder.h"
#include "passes/pass_utils/pass_operation_utils.h"
#include "symbolic_scalar_test_utils.h"
#include "pass_test_utils.h"
#include "tilefwk/tilefwk.h"
#include "ut_json/ut_json_tool.h"
#include "passes/pass_mgr/pass_manager.h"
#include "interface/configs/config_manager.h"
#include "interface/tensor/irbuilder.h"
#define private public
#include "passes/tile_graph_pass/graph_optimization/remove_redundant_op.h"
namespace npu {
namespace tile_fwk {
static const size_t kSizeZero = 0UL;
static const size_t kSizeOne = 1UL;
static const size_t kSizeSeven = 7UL;
static const size_t kSizeEight = 8UL;
static const size_t kSizeTen = 10UL;
static const size_t kSizeEleven = 11UL;
static const size_t kSizeThirteen = 13UL;
static const size_t kSizeForteen = 14UL;
static const int32_t kNumNegOne = -1;
static const uint16_t kNumZero = 0u;
static const uint16_t kNumOne = 1u;
static const uint16_t kNumTwo = 2u;
static const uint16_t kNumThree = 3u;
static const uint16_t kNumFour = 4u;
static const uint16_t kNumFive = 5u;
static const uint16_t kNumEight = 8u;
static const uint16_t kNumExpFour = 16u;
static const uint16_t kNumExpFive = 32u;
static const uint16_t kNumExpSix = 64u;
static const uint16_t kNumExpSeven = 128u;
static const uint16_t kNumExpEight = 256u;
class TestRemoveRedundantOpPass : public ::testing::Test {
public:
static void SetUpTestCase() {}
static void TearDownTestCase() {}
void SetUp() override
{
Program::GetInstance().Reset();
config::Reset();
config::SetHostOption(COMPILE_STAGE, CS_EXECUTE_GRAPH);
config::SetHostConfig(KEY_STRATEGY, "ExpandFunctionTestStrategy");
config::SetPlatformConfig(KEY_ENABLE_COST_MODEL, false);
TileShape::Current().SetVecTile({64, 64});
}
void TearDown() override {}
};
TESTRemoveDummyExpand
inCast{8,16}->expand->ubTensor{8,16}->exp->outCast1{8,16}
->sqrt->outCast2{8,16}
->reciprocal->outCast3{8,16}
inCast{8,16}->exp->outCast1
->sqrt->outCast2
->reciprocal->outCast3
*/
TEST_F(TestRemoveRedundantOpPass, RemoveRedundantOpUTest1)
{
auto currFunctionPtr =
std::make_shared<Function>(Program::GetInstance(), "TestRemoveRedundantOp", "TestRemoveRedundantOp", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
std::vector<int64_t> shape = {kNumEight, kNumExpFour};
auto inCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto ubTensor = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto outCast1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto outCast2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto outCast3 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXPAND, {inCast}, {ubTensor});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXP, {ubTensor}, {outCast1});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_SQRT, {ubTensor}, {outCast2});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_RECIPROCAL, {ubTensor}, {outCast3});
currFunctionPtr->inCasts_.push_back(inCast);
currFunctionPtr->outCasts_.push_back(outCast1);
currFunctionPtr->outCasts_.push_back(outCast2);
currFunctionPtr->outCasts_.push_back(outCast3);
RemoveRedundantOp removeredundantpass;
EXPECT_EQ(removeredundantpass.RunOnFunction(*currFunctionPtr), SUCCESS);
EXPECT_EQ(removeredundantpass.PostCheck(*currFunctionPtr), SUCCESS);
uint32_t expand_num = kNumZero;
for (auto& op : currFunctionPtr->Operations()) {
if (op.GetOpcode() == Opcode::OP_EXPAND) {
++expand_num;
} else if (op.GetOpcode() == Opcode::OP_SQRT) {
EXPECT_EQ(op.GetInputOperandSize(), kSizeOne);
EXPECT_EQ(op.GetInputOperand(kSizeZero), inCast);
} else if (op.GetOpcode() == Opcode::OP_EXP) {
EXPECT_EQ(op.GetInputOperandSize(), kSizeOne);
EXPECT_EQ(op.GetInputOperand(kSizeZero), inCast);
} else if (op.GetOpcode() == Opcode::OP_RECIPROCAL) {
EXPECT_EQ(op.GetInputOperandSize(), kSizeOne);
EXPECT_EQ(op.GetInputOperand(kSizeZero), inCast);
}
}
EXPECT_EQ(expand_num, kNumZero);
}
TESTRemoveDummyRegCopy
inCast{8,16}->regcopy->ubTensor1{16,8}->regcopy->ubTensor2{16,8}->exp->outCast1{16,8}
inCast{8,16}->regcopy->ubTensor1{16,8}->exp->outCast1{16,8}
*/
TEST_F(TestRemoveRedundantOpPass, RemoveRedundantOpUTest2)
{
auto currFunctionPtr =
std::make_shared<Function>(Program::GetInstance(), "TestRemoveRedundantOp", "TestRemoveRedundantOp", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
std::vector<int64_t> shape1 = {kNumEight, kNumExpFour};
std::vector<int64_t> shape2 = {kNumExpFour, kNumEight};
auto inCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
auto ubTensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape2, CreateTestConstIntVector(shape2));
auto ubTensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape2, CreateTestConstIntVector(shape2));
auto outCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape2, CreateTestConstIntVector(shape2));
auto& regcopy = PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_REGISTER_COPY, {inCast}, {ubTensor1});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_REGISTER_COPY, {ubTensor1}, {ubTensor2});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXP, {ubTensor2}, {outCast});
currFunctionPtr->inCasts_.push_back(inCast);
currFunctionPtr->outCasts_.push_back(outCast);
RemoveRedundantOp removeredundantpass;
EXPECT_EQ(removeredundantpass.PreCheck(*currFunctionPtr), SUCCESS);
EXPECT_NE(removeredundantpass.PostCheck(*currFunctionPtr), SUCCESS);
EXPECT_EQ(removeredundantpass.RunOnFunction(*currFunctionPtr), SUCCESS);
EXPECT_EQ(removeredundantpass.PostCheck(*currFunctionPtr), SUCCESS);
uint32_t regcopy_num = kNumZero;
for (auto& op : currFunctionPtr->Operations()) {
if (op.GetOpcode() == Opcode::OP_REGISTER_COPY) {
EXPECT_EQ(op.GetOpMagic(), regcopy.GetOpMagic());
EXPECT_EQ(op.GetInputOperandSize(), kSizeOne);
EXPECT_EQ(op.GetInputOperand(kSizeZero), inCast);
++regcopy_num;
} else if (op.GetOpcode() == Opcode::OP_EXP) {
EXPECT_EQ(op.GetInputOperandSize(), kSizeOne);
EXPECT_EQ(op.GetInputOperand(kSizeZero), ubTensor1);
}
}
EXPECT_EQ(regcopy_num, kNumOne);
}
TESTRemoveDummyAssembleDDRSpecialCase(WARNING CASE)
inCast{8,16}->exp(any legal op)->ddrTensor1{8,16} ->exp->outCast3{8,16}
->assemble->outCast1{8,16}
->assemble->outCast2{8,16}
inCast{8,16}->exp->outCast1/outCast2{8,16}->exp->outCast3{8,16}
*/
TEST_F(TestRemoveRedundantOpPass, RemoveRedundantOpUTest3)
{
auto currFunctionPtr =
std::make_shared<Function>(Program::GetInstance(), "TestRemoveRedundantOp", "TestRemoveRedundantOp", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
std::vector<int64_t> shape = {kNumEight, kNumExpFour};
auto inCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto ubTensor = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
ubTensor->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto outCast1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape), TileOpFormat::TILEOP_ND, "outCast1");
outCast1->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto outCast2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape), TileOpFormat::TILEOP_ND, "outCast2");
outCast2->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto outCast3 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto& exp1 = PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXP, {inCast}, {ubTensor});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_ASSEMBLE, {ubTensor}, {outCast1});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_ASSEMBLE, {ubTensor}, {outCast2});
auto& exp2 = PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXP, {ubTensor}, {outCast3});
currFunctionPtr->inCasts_.push_back(inCast);
currFunctionPtr->outCasts_.push_back(outCast1);
currFunctionPtr->outCasts_.push_back(outCast2);
currFunctionPtr->outCasts_.push_back(outCast3);
RemoveRedundantOp removeredundantpass;
EXPECT_NE(removeredundantpass.PostCheck(*currFunctionPtr), SUCCESS);
EXPECT_NE(removeredundantpass.PreCheck(*currFunctionPtr), SUCCESS);
EXPECT_EQ(removeredundantpass.RunOnFunction(*currFunctionPtr), SUCCESS);
EXPECT_EQ(removeredundantpass.PostCheck(*currFunctionPtr), SUCCESS);
uint32_t assemble_num = kNumZero;
for (auto& op : currFunctionPtr->Operations()) {
if (op.GetOpcode() == Opcode::OP_ASSEMBLE) {
++assemble_num;
}
}
EXPECT_EQ(assemble_num, kNumZero);
EXPECT_EQ(exp1.GetOutputOperandSize(), kSizeOne);
EXPECT_EQ(exp2.GetInputOperandSize(), kSizeOne);
}
TESTRemoveDummyView(WARNING CASE)
inCast{8,16}->exp->ddrTensor1{8,16}->exp->ubTensor2{8,16}->view->ubTensor3{8,16}->exp->outCast2{8,16}
->view->outCast1{8,16} ->reciprocal->outCast3{8,16}
->sqrt->outCast4{8,16}
*/
TEST_F(TestRemoveRedundantOpPass, RemoveRedundantOpUTest4)
{
auto currFunctionPtr =
std::make_shared<Function>(Program::GetInstance(), "TestRemoveRedundantOp", "TestRemoveRedundantOp", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
std::vector<int64_t> shape1 = {kNumEight, kNumExpFour};
auto inCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
auto ubTensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
auto ubTensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
auto ubTensor3 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
auto outCast1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
auto outCast2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
auto outCast3 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
auto outCast4 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXP, {inCast}, {ubTensor1});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_VIEW, {ubTensor1}, {outCast1});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXP, {ubTensor1}, {ubTensor2});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_VIEW, {ubTensor2}, {ubTensor3});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXP, {ubTensor3}, {outCast2});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_RECIPROCAL, {ubTensor3}, {outCast3});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_SQRT, {ubTensor3}, {outCast4});
currFunctionPtr->inCasts_.push_back(inCast);
currFunctionPtr->outCasts_.push_back(outCast1);
currFunctionPtr->outCasts_.push_back(outCast2);
currFunctionPtr->outCasts_.push_back(outCast3);
currFunctionPtr->outCasts_.push_back(outCast4);
RemoveRedundantOp removeredundantpass;
EXPECT_NE(removeredundantpass.PreCheck(*currFunctionPtr), SUCCESS);
}
TESTRemoveAssemble1
inCast{8,16}->view->ddrTensor{8,16}->assemble->outCast{1,8,16}
*/
TEST_F(TestRemoveRedundantOpPass, RemoveRedundantOpUTest6)
{
auto currFunctionPtr =
std::make_shared<Function>(Program::GetInstance(), "TestRemoveRedundantOp", "TestRemoveRedundantOp", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
std::vector<int64_t> shape = {kNumEight, kNumExpFour};
auto inCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto ddrTensor = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto outCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_VIEW, {inCast}, {ddrTensor});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_ASSEMBLE, {ddrTensor}, {outCast});
currFunctionPtr->inCasts_.push_back(inCast);
currFunctionPtr->outCasts_.push_back(outCast);
RemoveRedundantOp removeredundantpass;
EXPECT_NE(removeredundantpass.PreCheck(*currFunctionPtr), SUCCESS);
}
TESTRemoveDummyRegCopy
inCast{8,16}/{a0,16}->regcopy->ubTensor1{8,16}/{a1,16}->regcopy->ubTensor2{16,8}/{a1,16}->exp->outCast1{16,8}
inCast{8,16}/{a0,16}->regcopy->ubTensor1{8,16}/{a1,16}->exp->outCast1{16,8}
*/
TEST_F(TestRemoveRedundantOpPass, RemoveRedundantOpUTest7)
{
auto currFunctionPtr =
std::make_shared<Function>(Program::GetInstance(), "TestRemoveRedundantOp", "TestRemoveRedundantOp", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
std::vector<int64_t> shape1 = {kNumEight, kNumExpFour};
auto inCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
inCast->SetMemoryTypeBoth(MemoryType::MEM_DEVICE_DDR);
auto ubTensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
ubTensor1->SetMemoryTypeBoth(MemoryType::MEM_UB);
auto ubTensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
ubTensor2->SetMemoryTypeBoth(MemoryType::MEM_UB);
auto outCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
outCast->SetMemoryTypeBoth(MemoryType::MEM_UB);
auto& regcopy = PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_REGISTER_COPY, {inCast}, {ubTensor1});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_REGISTER_COPY, {ubTensor1}, {ubTensor2});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXP, {ubTensor2}, {outCast});
currFunctionPtr->inCasts_.push_back(inCast);
currFunctionPtr->outCasts_.push_back(outCast);
RemoveRedundantOp removeredundantpass;
EXPECT_NE(removeredundantpass.PostCheck(*currFunctionPtr), SUCCESS);
EXPECT_EQ(removeredundantpass.RunOnFunction(*currFunctionPtr), SUCCESS);
EXPECT_EQ(removeredundantpass.PostCheck(*currFunctionPtr), SUCCESS);
uint32_t regcopy_num = kNumZero;
for (auto& op : currFunctionPtr->Operations()) {
if (op.GetOpcode() == Opcode::OP_REGISTER_COPY) {
EXPECT_EQ(op.GetOpMagic(), regcopy.GetOpMagic());
EXPECT_EQ(op.GetInputOperandSize(), kSizeOne);
EXPECT_EQ(op.GetInputOperand(kSizeZero), inCast);
++regcopy_num;
} else if (op.GetOpcode() == Opcode::OP_EXP) {
EXPECT_EQ(op.GetInputOperandSize(), kSizeOne);
EXPECT_EQ(op.GetInputOperand(kSizeZero), ubTensor1);
}
}
EXPECT_EQ(regcopy_num, kNumOne);
}
TESTRemoveAssembleDDR2
inCast{8,16}->view->ubTensor1{8,16}->assemble->outCast1{8,16}
all delete
*/
TEST_F(TestRemoveRedundantOpPass, RemoveRedundantOpUTest10)
{
auto currFunctionPtr =
std::make_shared<Function>(Program::GetInstance(), "TestRemoveRedundantOp", "TestRemoveRedundantOp", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
std::vector<int64_t> shape = {kNumEight, kNumExpFour};
std::vector<int64_t> offset = {kNumZero, kNumZero};
auto inCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
inCast->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto ubTensor = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
ubTensor->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto outCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape), TileOpFormat::TILEOP_ND, "outCast");
outCast->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_VIEW, {inCast}, {ubTensor},
[&offset](Operation& op) {
op.SetOpAttribute(std::make_shared<ViewOpAttribute>(offset));
});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_ASSEMBLE, {ubTensor}, {outCast},
[&offset](Operation& op) {
op.SetOpAttribute(std::make_shared<AssembleOpAttribute>(offset));
});
currFunctionPtr->inCasts_.push_back(inCast);
currFunctionPtr->outCasts_.push_back(outCast);
RemoveRedundantOp removeredundantpass;
EXPECT_EQ(removeredundantpass.PreCheck(*currFunctionPtr), SUCCESS);
EXPECT_EQ(removeredundantpass.RunOnFunction(*currFunctionPtr), SUCCESS);
EXPECT_EQ(removeredundantpass.PostCheck(*currFunctionPtr), SUCCESS);
uint32_t assemble_num = kNumZero;
uint32_t view_num = kNumZero;
for (auto& op : currFunctionPtr->Operations()) {
if (op.GetOpcode() == Opcode::OP_ASSEMBLE) {
++assemble_num;
}
if (op.GetOpcode() == Opcode::OP_VIEW) {
++view_num;
}
}
EXPECT_EQ(assemble_num, kNumZero);
EXPECT_EQ(view_num, kNumZero);
}
TESTRemoveAssembleDDR3
inCast1{8,16}->view->ubTensor1{16,16}->assemble->outCast1{16,16}
inCast2{8,16}->view->
all delete
*/
TEST_F(TestRemoveRedundantOpPass, RemoveRedundantOpUTest11)
{
auto currFunctionPtr =
std::make_shared<Function>(Program::GetInstance(), "TestRemoveRedundantOp", "TestRemoveRedundantOp", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
std::vector<int64_t> shape1 = {kNumEight, kNumExpFour};
std::vector<int64_t> shape2 = {kNumExpFour, kNumExpFour};
std::vector<int64_t> offset1 = {kNumZero, kNumZero};
std::vector<int64_t> offset2 = {kNumEight, kNumZero};
auto inCast1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
inCast1->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto inCast2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
inCast2->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto ubTensor = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape2, CreateTestConstIntVector(shape2));
ubTensor->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto outCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape2, CreateTestConstIntVector(shape2), TileOpFormat::TILEOP_ND, "outCast");
outCast->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_VIEW, {inCast1}, {ubTensor},
[&offset1](Operation& op) {
op.SetOpAttribute(std::make_shared<ViewOpAttribute>(offset1));
});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_VIEW, {inCast2}, {ubTensor},
[&offset2](Operation& op) {
op.SetOpAttribute(std::make_shared<ViewOpAttribute>(offset2));
});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_ASSEMBLE, {ubTensor}, {outCast});
currFunctionPtr->inCasts_.push_back(inCast1);
currFunctionPtr->inCasts_.push_back(inCast2);
currFunctionPtr->outCasts_.push_back(outCast);
RemoveRedundantOp removeredundantpass;
EXPECT_EQ(removeredundantpass.PreCheck(*currFunctionPtr), SUCCESS);
EXPECT_EQ(removeredundantpass.RunOnFunction(*currFunctionPtr), SUCCESS);
EXPECT_EQ(removeredundantpass.PostCheck(*currFunctionPtr), SUCCESS);
uint32_t assemble_num = kNumZero;
uint32_t view_num = kNumZero;
for (auto& op : currFunctionPtr->Operations()) {
if (op.GetOpcode() == Opcode::OP_ASSEMBLE) {
++assemble_num;
}
if (op.GetOpcode() == Opcode::OP_VIEW) {
++view_num;
}
}
EXPECT_EQ(assemble_num, kNumZero);
EXPECT_EQ(view_num, kNumTwo);
}
TESTPostExpand(DynValidShape not same)
inCast{8,16}->sqrt->ubTensor1{8,16}->expand->ubTensor2{8,16}->exp->outCast1{8,16}
inCast{8,16}->sqrt->ubTensor1{8,16}->expand->ubTensor2{8,16}->exp->outCast1{8,16}
*/
TEST_F(TestRemoveRedundantOpPass, RemoveRedundantOpUTest12)
{
auto currFunctionPtr =
std::make_shared<Function>(Program::GetInstance(), "TestRemoveRedundantOp", "TestRemoveRedundantOp", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
std::vector<int64_t> shape = {kNumEight, kNumExpFour};
auto inCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto outCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto ubTensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto ubTensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
std::vector<SymbolicScalar> dynValidShape1;
std::vector<SymbolicScalar> dynValidShape2;
dynValidShape1.push_back(CreateTestScalarVar("Tensor1"));
dynValidShape2.push_back(CreateTestScalarVar("Tensor2"));
ubTensor1->UpdateDynValidShape(dynValidShape1);
ubTensor2->UpdateDynValidShape(dynValidShape2);
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXPAND, {ubTensor1}, {ubTensor2});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_SQRT, {inCast}, {ubTensor1});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXP, {ubTensor2}, {outCast});
currFunctionPtr->inCasts_.push_back(inCast);
currFunctionPtr->outCasts_.push_back(outCast);
RemoveRedundantOp removeredundantpass;
EXPECT_EQ(removeredundantpass.RunOnFunction(*currFunctionPtr), SUCCESS);
EXPECT_EQ(removeredundantpass.PostCheck(*currFunctionPtr), SUCCESS);
uint32_t expand_num = kNumZero;
for (auto& op : currFunctionPtr->Operations()) {
if (op.GetOpcode() == Opcode::OP_EXPAND) {
++expand_num;
}
}
EXPECT_EQ(expand_num, kNumOne);
}
view->exp(end assemble)->view(end assemble)->expand(end assemble)->exp(end assemble)
->exp(end assemble)
exp(end assemble*3) ->exp(end assemble)
->exp(end assemble)
*/
TEST_F(TestRemoveRedundantOpPass, RemoveRedundantOpSTest1)
{
std::vector<int64_t> shape = {kNumExpSix, kNumExpSix};
PassManager& passManager = PassManager::Instance();
Tensor input(DT_FP32, shape, "input");
Tensor exp(DT_FP32, shape, "exp");
Tensor view(DT_FP32, shape, "view");
Tensor expand(DT_FP32, shape, "expand");
Tensor output1(DT_FP32, shape, "output1");
Tensor output2(DT_FP32, shape, "output2");
FUNCTION("STCase1")
{
exp = Exp(input);
view = View(exp, shape, {kNumZero, kNumZero});
expand = Expand(view, shape);
output1 = Exp(expand);
output2 = Exp(expand);
}
Function* func = Program::GetInstance().GetFunctionByRawName("TENSOR_STCase1");
EXPECT_EQ(func->Operations().size(), kSizeEleven);
passManager.RegisterStrategy(
"RemoveRedundantOpTestStrategy", {
{"AssignMemoryType", PassName::ASSIGN_MEMORY_TYPE},
{"RemoveRedundantOp", PassName::REMOVE_REDUNDANT_OP},
});
auto ret = passManager.RunPass(Program::GetInstance(), *func, "RemoveRedundantOpTestStrategy");
EXPECT_EQ(ret, SUCCESS);
auto updated_operations = func->Operations();
int view_num = kNumZero;
int expand_num = kNumZero;
for (const auto& op : updated_operations) {
if (op.GetOpcode() == Opcode::OP_VIEW) {
view_num++;
} else if (op.GetOpcode() == Opcode::OP_EXPAND) {
expand_num++;
}
}
EXPECT_EQ(view_num, kNumOne);
EXPECT_EQ(expand_num, kNumZero);
}
view->exp(end assemble)->view(end assemble)->expand(end assemble)->exp(end assemble)
->exp(end assemble)
exp(end assemble)->view(end assemble)->expand(end assemble) ->exp(end assemble)
->exp(end assemble)
*/
TEST_F(TestRemoveRedundantOpPass, RemoveRedundantOpSTest2)
{
std::vector<int64_t> shape = {kNumExpSix, kNumExpSix};
std::vector<int64_t> shape2 = {kNumExpFour, 1};
std::vector<int64_t> shape3 = {kNumExpFour, kNumExpEight};
PassManager& passManager = PassManager::Instance();
Tensor input(DT_FP32, shape, "input");
Tensor exp(DT_FP32, shape, "exp");
Tensor view(DT_FP32, shape2, "view");
Tensor expand(DT_FP32, shape3, "expand");
Tensor output1(DT_FP32, shape3, "output1");
Tensor output2(DT_FP32, shape3, "output2");
FUNCTION("STCase2")
{
exp = Exp(input);
view = View(exp, shape2, {kNumZero, kNumZero});
expand = Expand(view, shape3);
output1 = Exp(expand);
output2 = Exp(expand);
}
Function* func = Program::GetInstance().GetFunctionByRawName("TENSOR_STCase2");
passManager.RegisterStrategy(
"RemoveRedundantOpTestStrategy", {
{"AssignMemoryType", PassName::ASSIGN_MEMORY_TYPE},
{"RemoveRedundantOp", PassName::REMOVE_REDUNDANT_OP},
});
auto ret = passManager.RunPass(Program::GetInstance(), *func, "RemoveRedundantOpTestStrategy");
EXPECT_EQ(ret, SUCCESS);
auto updated_operations = func->Operations();
int view_num = kNumZero;
int expand_num = kNumZero;
for (const auto& op : updated_operations) {
if (op.GetOpcode() == Opcode::OP_VIEW) {
view_num++;
} else if (op.GetOpcode() == Opcode::OP_EXPAND) {
expand_num++;
}
}
EXPECT_EQ(view_num, kNumTwo);
EXPECT_EQ(expand_num, kNumOne);
}
view{64,64} ->exp{64,64} ->assemble{64, 64}
view{64,64} ->view{32,64} ->exp{64, 64} ->assemble{32, 64} ->assemble{64, 64}
->view{32,64} ->exp{64, 64} ->assemble{32, 64}
view{64,64} ->view{32,64} ->exp{64, 64} ->assemble{32, 64}
->view{32,64} ->exp{64, 64} ->assemble{32, 64}
*/
TEST_F(TestRemoveRedundantOpPass, RemoveRedundantOpSTest3)
{
std::vector<int64_t> shape = {kNumExpSix, kNumExpSix};
std::vector<int64_t> tile_shape = {kNumExpFive, kNumExpSix};
PassManager& passManager = PassManager::Instance();
passManager.RegisterStrategy(
"ExpandFunctionTestStrategy", {
{"ExpandFunction", PassName::EXPAND_FUNCTION},
{"AssignMemoryType", PassName::ASSIGN_MEMORY_TYPE},
});
Tensor input(DT_FP32, shape, "input");
Tensor output(DT_FP32, shape, "output");
FUNCTION("STCase3")
{
TileShape::Current().SetVecTile(tile_shape);
output = Exp(input);
}
Function* func = Program::GetInstance().GetFunctionByRawName("TENSOR_STCase3");
int assemble_before = kNumZero;
for (const auto& op : func->Operations()) {
if (op.GetOpcode() == Opcode::OP_ASSEMBLE) {
assemble_before++;
}
}
EXPECT_EQ(assemble_before, kNumThree);
passManager.RegisterStrategy(
"RemoveRedundantOpTestStrategy", {
{"RemoveRedundantOp", PassName::REMOVE_REDUNDANT_OP},
});
EXPECT_EQ(passManager.RunPass(Program::GetInstance(), *func, "RemoveRedundantOpTestStrategy"), SUCCESS);
int assemble_after = kNumZero;
for (const auto& op : func->Operations()) {
if (op.GetOpcode() == Opcode::OP_ASSEMBLE) {
assemble_after++;
}
}
EXPECT_EQ(assemble_after, kNumTwo);
EXPECT_NE(assemble_after, assemble_before);
}
void RemoveRedundantL1DataMoveGraph(std::shared_ptr<Function>& currFunctionPtr)
{
std::shared_ptr<LogicalTensor> input_cast1 =
npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{32, 64}, CreateTestConstIntVector(std::vector<int64_t>{32, 64}));
std::shared_ptr<LogicalTensor> input_cast2 =
npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{64, 16}, CreateTestConstIntVector(std::vector<int64_t>{64, 16}));
std::shared_ptr<LogicalTensor> input_cast1_view =
npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{32, 64}, CreateTestConstIntVector(std::vector<int64_t>{32, 64}));
std::shared_ptr<LogicalTensor> input_cast2_view =
npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{64, 16}, CreateTestConstIntVector(std::vector<int64_t>{64, 16}));
input_cast1_view->SetMemoryTypeBoth(MEM_L1);
input_cast2_view->SetMemoryTypeBoth(MEM_L1);
std::shared_ptr<LogicalTensor> op_view_L1_out1 =
npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{32, 64}, CreateTestConstIntVector(std::vector<int64_t>{32, 64}));
std::shared_ptr<LogicalTensor> op_view_L1_out2 =
npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{64, 16}, CreateTestConstIntVector(std::vector<int64_t>{64, 16}));
op_view_L1_out1->SetMemoryTypeBoth(MEM_L1);
op_view_L1_out2->SetMemoryTypeBoth(MEM_L1);
std::shared_ptr<LogicalTensor> view_out1 =
npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{32, 32}, CreateTestConstIntVector(std::vector<int64_t>{32, 32}));
std::shared_ptr<LogicalTensor> view_out2 =
npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{32, 32}, CreateTestConstIntVector(std::vector<int64_t>{32, 32}));
std::shared_ptr<LogicalTensor> view_out3 =
npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{32, 16}, CreateTestConstIntVector(std::vector<int64_t>{32, 16}));
std::shared_ptr<LogicalTensor> view_out4 =
npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{32, 16}, CreateTestConstIntVector(std::vector<int64_t>{32, 16}));
std::shared_ptr<LogicalTensor> l0a_out1 =
npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{32, 32}, CreateTestConstIntVector(std::vector<int64_t>{32, 32}));
std::shared_ptr<LogicalTensor> l0a_out2 =
npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{32, 32}, CreateTestConstIntVector(std::vector<int64_t>{32, 32}));
std::shared_ptr<LogicalTensor> l0b_out1 =
npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{32, 16}, CreateTestConstIntVector(std::vector<int64_t>{32, 16}));
std::shared_ptr<LogicalTensor> l0b_out2 =
npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{32, 16}, CreateTestConstIntVector(std::vector<int64_t>{32, 16}));
std::shared_ptr<LogicalTensor> a_mul_b_out1 =
npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{32, 16}, CreateTestConstIntVector(std::vector<int64_t>{32, 16}));
std::shared_ptr<LogicalTensor> a_mul_b_out2 =
npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, std::vector<int64_t>{32, 16}, CreateTestConstIntVector(std::vector<int64_t>{32, 16}));
auto& head_view_op1 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_VIEW, {input_cast1}, {input_cast1_view});
std::vector<int> newoffset{0, 0};
auto viewAttribute = std::make_shared<ViewOpAttribute>(std::vector<int64_t>{0, 0});
viewAttribute->SetToType(MemoryType::MEM_L1);
head_view_op1.SetOpAttribute(viewAttribute);
auto& head_view_op2 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_VIEW, {input_cast2}, {input_cast2_view});
head_view_op2.SetOpAttribute(viewAttribute);
auto& view_L1_op1 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_VIEW, {input_cast1_view}, {op_view_L1_out1});
view_L1_op1.SetOpAttribute(viewAttribute);
auto& view_L1_op2 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_VIEW, {input_cast2_view}, {op_view_L1_out2});
view_L1_op2.SetOpAttribute(viewAttribute);
auto& view_op1 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_VIEW, {op_view_L1_out1}, {view_out1});
view_op1.SetOpAttribute(std::make_shared<ViewOpAttribute>(std::vector<int64_t>{0, 0}));
auto& view_op2 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_VIEW, {op_view_L1_out1}, {view_out2});
view_op2.SetOpAttribute(std::make_shared<ViewOpAttribute>(std::vector<int64_t>{0, 32}));
auto& view_op3 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_VIEW, {op_view_L1_out2}, {view_out3});
view_op3.SetOpAttribute(std::make_shared<ViewOpAttribute>(std::vector<int64_t>{0, 0}));
auto& view_op4 = IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_VIEW, {op_view_L1_out2}, {view_out4});
view_op4.SetOpAttribute(std::make_shared<ViewOpAttribute>(std::vector<int64_t>{32, 0}));
IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_L1_TO_L0A, {view_out1}, {l0a_out1});
IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_L1_TO_L0A, {view_out2}, {l0a_out2});
IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_L1_TO_L0B, {view_out3}, {l0b_out1});
IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_L1_TO_L0B, {view_out4}, {l0b_out2});
IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_A_MUL_B, {l0a_out1, l0b_out1}, {a_mul_b_out1});
IRBuilder().CreateTensorOpStmt(*currFunctionPtr, Opcode::OP_A_MUL_B, {l0a_out2, l0b_out2}, {a_mul_b_out2});
currFunctionPtr->inCasts_.push_back(input_cast1);
currFunctionPtr->inCasts_.push_back(input_cast2);
currFunctionPtr->outCasts_.push_back(a_mul_b_out1);
currFunctionPtr->outCasts_.push_back(a_mul_b_out2);
}
TEST_F(TestRemoveRedundantOpPass, RemoveRedundantOpL1DataMove)
{
auto currFunctionPtr = std::make_shared<Function>(
Program::GetInstance(), "RemoveRedundantOpL1DataMove", "RemoveRedundantOpL1DataMove", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
Program::GetInstance().InsertFuncToFunctionMap("RemoveRedundantOpL1DataMove", currFunctionPtr);
RemoveRedundantL1DataMoveGraph(currFunctionPtr);
int view_count = 0;
for (auto& op : currFunctionPtr->Operations()) {
if (op.GetOpcode() == Opcode::OP_VIEW) {
view_count++;
}
}
EXPECT_EQ(view_count, 8);
std::stringstream ssBefore;
ssBefore << "Before_RemoveRedundantOp";
RemoveRedundantOp removeRedundantOp;
removeRedundantOp.PreCheck(*currFunctionPtr);
currFunctionPtr->DumpJsonFile("./config/pass/json/removeRedundant_L1DataMove_before.json");
removeRedundantOp.RunOnFunction(*currFunctionPtr);
currFunctionPtr->DumpJsonFile("./config/pass/json/removeRedundant_L1DataMove_after.json");
removeRedundantOp.PostCheck(*currFunctionPtr);
std::stringstream ss;
ss << "After_RemoveRedundantOp";
int view_count_after_pass = 0;
for (auto& op : currFunctionPtr->Operations()) {
if (op.GetOpcode() == Opcode::OP_VIEW) {
view_count_after_pass++;
}
}
EXPECT_EQ(view_count_after_pass, 6);
}
RemoveReshapeChain
inCast{8,16}->reshape->ubTensor1{16,8}->reshape->ubTensor2{32,4}->sqrt->outCast{32,4}
inCast{8,16}->reshape->ubTensor2{32,4}->sqrt->outCast{32,4}
*/
TEST_F(TestRemoveRedundantOpPass, RemoveRedundantOpUTest13)
{
auto currFunctionPtr = std::make_shared<Function>(
Program::GetInstance(), "TestRemoveRedundantReshape", "TestRemoveRedundantReshape", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
std::vector<int64_t> shape1 = {kNumEight, kNumExpFour};
std::vector<int64_t> shape2 = {kNumExpFour, kNumEight};
std::vector<int64_t> shape3 = {kNumExpFive, kNumFour};
auto inCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
auto ubTensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape2, CreateTestConstIntVector(shape2));
auto ubTensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape3, CreateTestConstIntVector(shape3));
auto outCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape3, CreateTestConstIntVector(shape3));
auto& reshape1 = PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_RESHAPE, {inCast}, {ubTensor1});
auto& reshape2 = PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_RESHAPE, {ubTensor1}, {ubTensor2});
auto& sqrt = PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_SQRT, {ubTensor2}, {outCast});
currFunctionPtr->inCasts_.push_back(inCast);
currFunctionPtr->outCasts_.push_back(outCast);
RemoveRedundantOp removeredundantpass;
auto status = removeredundantpass.RunOnFunction(*currFunctionPtr);
EXPECT_EQ(status, SUCCESS);
const auto& operations = currFunctionPtr->Operations();
uint32_t reshape_num = kNumZero;
for (auto& op : operations) {
if (op.GetOpcode() == Opcode::OP_RESHAPE) {
EXPECT_EQ(reshape2.GetOpMagic(), op.GetOpMagic());
EXPECT_EQ(reshape2.GetInputOperand(kSizeZero), inCast);
++reshape_num;
} else if (op.GetOpcode() == Opcode::OP_SQRT) {
EXPECT_EQ(sqrt.GetInputOperandSize(), kSizeOne);
EXPECT_EQ(sqrt.GetInputOperand(kSizeZero), ubTensor2);
}
}
EXPECT_EQ(operations.Contains(reshape1), false);
EXPECT_EQ(reshape_num, kNumOne);
}
RemoveSameReshape
inCast{8,16}->reshape->ubTensor{8,16}->sqrt->outCast{8,16}
inCast{8,16}->sqrt->outCast{8,16}
*/
TEST_F(TestRemoveRedundantOpPass, RemoveRedundantOpUTest14)
{
auto currFunctionPtr = std::make_shared<Function>(
Program::GetInstance(), "TestRemoveRedundantReshape", "TestRemoveRedundantReshape", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
std::vector<int64_t> shape = {kNumEight, kNumExpFour};
auto inCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto ubTensor = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto outCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_RESHAPE, {inCast}, {ubTensor});
auto& sqrt = PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_SQRT, {ubTensor}, {outCast});
currFunctionPtr->inCasts_.push_back(inCast);
currFunctionPtr->outCasts_.push_back(outCast);
RemoveRedundantOp removeredundantpass;
auto status = removeredundantpass.RunOnFunction(*currFunctionPtr);
EXPECT_EQ(status, SUCCESS);
uint32_t reshape_num = kNumZero;
for (auto& op : currFunctionPtr->Operations()) {
if (op.GetOpcode() == Opcode::OP_RESHAPE) {
++reshape_num;
} else if (op.GetOpcode() == Opcode::OP_SQRT) {
EXPECT_EQ(sqrt.GetInputOperandSize(), kSizeOne);
EXPECT_EQ(sqrt.GetInputOperand(kSizeZero), inCast);
}
}
EXPECT_EQ(reshape_num, kNumZero);
}
RemoveReshapeChainSeveralConsumer(WARNING CASE)
inCast{8,16}->reshape->ubTensor{8,16}->sqrt->outCast1{8,16}
->exp->outCast2{8,16}
->reshape->outCast3{16,8}
inCast{8,16}->sqrt->outCast1{8,16}
->exp->outCast2{8,16}
*/
TEST_F(TestRemoveRedundantOpPass, RemoveRedundantOpUTest15)
{
auto currFunctionPtr = std::make_shared<Function>(
Program::GetInstance(), "TestRemoveRedundantReshape", "TestRemoveRedundantReshape", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
std::vector<int64_t> shape1 = {kNumEight, kNumExpFour};
std::vector<int64_t> shape2 = {kNumExpFour, kNumEight};
auto inCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
auto ubTensor = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
auto outCast1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
auto outCast2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
auto outCast3 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape2, CreateTestConstIntVector(shape2));
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_RESHAPE, {inCast}, {ubTensor});
auto& sqrt = PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_SQRT, {ubTensor}, {outCast1});
auto& exp = PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXP, {ubTensor}, {outCast2});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_RESHAPE, {ubTensor}, {outCast3});
currFunctionPtr->inCasts_.push_back(inCast);
currFunctionPtr->outCasts_.push_back(outCast1);
currFunctionPtr->outCasts_.push_back(outCast2);
currFunctionPtr->outCasts_.push_back(outCast3);
RemoveRedundantOp removeredundantpass;
EXPECT_NE(removeredundantpass.PreCheck(*currFunctionPtr), SUCCESS);
EXPECT_EQ(removeredundantpass.RunOnFunction(*currFunctionPtr), SUCCESS);
for (auto& op : currFunctionPtr->Operations()) {
if (op.GetOpcode() == Opcode::OP_SQRT) {
EXPECT_EQ(sqrt.GetInputOperandSize(), kSizeOne);
EXPECT_EQ(sqrt.GetInputOperand(kSizeZero), inCast);
} else if (op.GetOpcode() == Opcode::OP_EXP) {
EXPECT_EQ(exp.GetInputOperandSize(), kSizeOne);
EXPECT_EQ(exp.GetInputOperand(kSizeZero), inCast);
}
}
}
RemoveReshapeChainSeveralConsumer
inCast{8,16}->reshape->ubTensor1{16,8}->exp->outCast1{16,8}
->reshape->ubTensor2{32,4}->sqrt->outCast2{32,4}
inCast{8,16}->reshape->ubTensor1{16,8}->exp->outCast1{16,8}
->reshape->ubTensor2{32,4}->sqrt->outCast2{32,4}
*/
TEST_F(TestRemoveRedundantOpPass, RemoveRedundantOpUTest16)
{
auto currFunctionPtr = std::make_shared<Function>(
Program::GetInstance(), "TestRemoveRedundantReshape", "TestRemoveRedundantReshape", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
std::vector<int64_t> shape1 = {kNumEight, kNumExpFour};
std::vector<int64_t> shape2 = {kNumExpFour, kNumEight};
std::vector<int64_t> shape3 = {kNumExpFive, kNumFour};
auto inCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
auto ubTensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape2, CreateTestConstIntVector(shape2));
auto outCast1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape2, CreateTestConstIntVector(shape2));
auto ubTensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape3, CreateTestConstIntVector(shape3));
auto outCast2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape3, CreateTestConstIntVector(shape3));
auto& reshape1 = PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_RESHAPE, {inCast}, {ubTensor1});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXP, {ubTensor1}, {outCast1});
auto& reshape2 = PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_RESHAPE, {ubTensor1}, {ubTensor2});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_SQRT, {ubTensor2}, {outCast2});
currFunctionPtr->inCasts_.push_back(inCast);
currFunctionPtr->outCasts_.push_back(outCast1);
currFunctionPtr->outCasts_.push_back(outCast2);
RemoveRedundantOp removeredundantpass;
auto status = removeredundantpass.RunOnFunction(*currFunctionPtr);
EXPECT_EQ(status, SUCCESS);
uint32_t reshape_num = kNumZero;
for (auto& op : currFunctionPtr->Operations()) {
if (op.GetOpcode() == Opcode::OP_RESHAPE) {
++reshape_num;
}
}
EXPECT_EQ(reshape1.GetInputOperand(kSizeZero), inCast);
EXPECT_EQ(reshape2.GetInputOperand(kSizeZero), inCast);
EXPECT_EQ(reshape_num, kNumTwo);
}
TESTRemoveIterative
inCast{8,16}->view->ubTensor1{8,16}->reshape->ubTensor2{8,16}->assemble->outCast1{8,16}
all delete
*/
TEST_F(TestRemoveRedundantOpPass, RemoveRedundantOpUTest17)
{
auto currFunctionPtr =
std::make_shared<Function>(Program::GetInstance(), "TestRemoveRedundantOp", "TestRemoveRedundantOp", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
std::vector<int64_t> shape = {kNumEight, kNumExpFour};
std::vector<int64_t> offset = {kNumZero, kNumZero};
auto inCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
inCast->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto ubTensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
ubTensor1->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto ubTensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
ubTensor2->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto outCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape), TileOpFormat::TILEOP_ND, "outCast");
outCast->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_VIEW, {inCast}, {ubTensor1},
[&offset](Operation& op) {
op.SetOpAttribute(std::make_shared<ViewOpAttribute>(offset));
});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_RESHAPE, {ubTensor1}, {ubTensor2});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_ASSEMBLE, {ubTensor2}, {outCast},
[&offset](Operation& op) {
op.SetOpAttribute(std::make_shared<AssembleOpAttribute>(offset));
});
currFunctionPtr->inCasts_.push_back(inCast);
currFunctionPtr->outCasts_.push_back(outCast);
RemoveRedundantOp removeredundantpass;
EXPECT_EQ(removeredundantpass.PreCheck(*currFunctionPtr), SUCCESS);
EXPECT_EQ(removeredundantpass.RunOnFunction(*currFunctionPtr), SUCCESS);
EXPECT_EQ(removeredundantpass.PostCheck(*currFunctionPtr), SUCCESS);
uint32_t assemble_num = kNumZero;
uint32_t view_num = kNumZero;
uint32_t reshape_num = kNumZero;
for (auto& op : currFunctionPtr->Operations()) {
if (op.GetOpcode() == Opcode::OP_ASSEMBLE) {
++assemble_num;
} else if (op.GetOpcode() == Opcode::OP_VIEW) {
++view_num;
} else if (op.GetOpcode() == Opcode::OP_RESHAPE) {
++reshape_num;
}
}
EXPECT_EQ(assemble_num, kNumZero);
EXPECT_EQ(view_num, kNumZero);
EXPECT_EQ(reshape_num, kNumZero);
}
TestRemoveAssembleSpecialCase
inCast{8,16}->exp->ddrTensor1{8,16} ->assemble-> outCast{8,16}
->exp->ddrTensor1{8,16} ->assemble->
inCast{8,16}->exp->ddrTensor1{8,16} ->assemble-> outCast{8,16}
->exp->ddrTensor1{8,16} ->assemble->
*/
TEST_F(TestRemoveRedundantOpPass, TestRemoveMoreAssembleSpecialCase)
{
auto currFunctionPtr =
std::make_shared<Function>(Program::GetInstance(), "TestRemoveRedundantOp", "TestRemoveRedundantOp", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
std::vector<int64_t> shape = {kNumEight, kNumExpFour};
auto inCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto outCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape), TileOpFormat::TILEOP_ND, "outCast");
outCast->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto ubTensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
ubTensor1->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto ubTensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
ubTensor2->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto ubTensor3 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
ubTensor3->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXP, {inCast}, {ubTensor1});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXP, {inCast}, {ubTensor2});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_ASSEMBLE, {ubTensor1}, {ubTensor3});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_ASSEMBLE, {ubTensor2}, {ubTensor3});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXP, {ubTensor3}, {outCast});
currFunctionPtr->inCasts_.push_back(inCast);
currFunctionPtr->outCasts_.push_back(outCast);
RemoveRedundantOp RemoveRedundantOpPass;
EXPECT_EQ(RemoveRedundantOpPass.PreCheck(*currFunctionPtr), SUCCESS);
EXPECT_EQ(RemoveRedundantOpPass.RunOnFunction(*currFunctionPtr), SUCCESS);
EXPECT_EQ(RemoveRedundantOpPass.PostCheck(*currFunctionPtr), SUCCESS);
uint32_t assembleNum = kNumZero;
for (const auto& op : currFunctionPtr->Operations()) {
if (op.GetOpcode() == Opcode::OP_ASSEMBLE) {
++assembleNum;
}
}
EXPECT_EQ(assembleNum, kNumTwo);
}
TestRemoveAssembleDynSpecialCase
inCast{8,16}->exp->Tensor1{8,16} ->Reshape->Tensor2{8,16} ->assemble-> outCast{8,16}
inCast{8,16}->exp->Tensor1{8,16} ->Reshape->Tensor2{16,8} ->assemble-> outCast{8,16}
*/
TEST_F(TestRemoveRedundantOpPass, TestRemoveMoreAssembleDynSpecialCase)
{
auto currFunctionPtr =
std::make_shared<Function>(Program::GetInstance(), "TestRemoveRedundantOp", "TestRemoveRedundantOp", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
std::vector<int64_t> shape = {kNumEight, kNumExpFour};
std::vector<int64_t> shape1 = {kNumExpFour, kNumEight};
auto inCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto outCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1), TileOpFormat::TILEOP_ND, "outCast");
outCast->UpdateDynValidShape({CreateTestScalarVar("output_0_Dim_0"), CreateTestScalarVar("output_0_Dim_1")});
outCast->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto ubTensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
ubTensor1->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
ubTensor1->UpdateDynValidShape({CreateTestScalarVar("Reshape_0_Dim_0"), CreateTestScalarVar("Reshape_0_Dim_1")});
auto ubTensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
ubTensor2->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
ubTensor2->UpdateDynValidShape({CreateTestScalarVar("Reshape_0_Dim_0"), CreateTestScalarVar("Reshape_0_Dim_1")});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXP, {inCast}, {ubTensor1});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_RESHAPE, {ubTensor1}, {ubTensor2});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_ASSEMBLE, {ubTensor2}, {outCast});
currFunctionPtr->inCasts_.push_back(inCast);
currFunctionPtr->outCasts_.push_back(outCast);
RemoveRedundantOp RemoveRedundantOpPass;
EXPECT_EQ(RemoveRedundantOpPass.PreCheck(*currFunctionPtr), SUCCESS);
EXPECT_EQ(RemoveRedundantOpPass.RunOnFunction(*currFunctionPtr), SUCCESS);
EXPECT_EQ(RemoveRedundantOpPass.PostCheck(*currFunctionPtr), SUCCESS);
uint32_t viewNum = kNumZero;
uint32_t assembleNum = kNumZero;
for (const auto& op : currFunctionPtr->Operations()) {
if (op.GetOpcode() == Opcode::OP_VIEW) {
++viewNum;
}
if (op.GetOpcode() == Opcode::OP_ASSEMBLE) {
++assembleNum;
}
}
EXPECT_EQ(currFunctionPtr->GetOutcast()[0]->GetDynValidShape()[0].Dump(), "Reshape_0_Dim_0");
EXPECT_EQ(currFunctionPtr->GetOutcast()[0]->GetDynValidShape()[1].Dump(), "Reshape_0_Dim_1");
EXPECT_EQ(viewNum, kNumZero);
EXPECT_EQ(assembleNum, kNumZero);
}
TestGenerateViewSpecialCase
inCast1{8,16}->view->Tensor1{4,16}->assemble->outCast{16,16}
->view->Tensor2{4,16}->assemble->
inCast2{8,16}->mul->Tenosr3{8,16}->assemble->
inCast3{8,16}
inCast1{8,16}->view->Tensor1{4,16}->assemble->outCast{16,16}
->view->Tensor2{4,16}->assemble->
inCast2{8,16}->mul->Tenosr3{8,16}->assemble->
inCast3{8,16}
*/
TEST_F(TestRemoveRedundantOpPass, TestGenerateViewSpecialCase)
{
auto currFunctionPtr =
std::make_shared<Function>(Program::GetInstance(), "TestRemoveRedundantOp", "TestRemoveRedundantOp", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
std::vector<int64_t> shape = {kNumEight, kNumExpFour};
std::vector<int64_t> shape1 = {kNumExpFour, kNumExpFour};
std::vector<int64_t> shape2 = {kNumFour, kNumExpFour};
std::vector<int64_t> offset1 = {kNumZero, kNumZero};
std::vector<int64_t> offset2 = {kNumFour, kNumZero};
std::vector<int64_t> offset3 = {kNumEight, kNumZero};
auto inCast1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto inCast2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto inCast3 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto outCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1), TileOpFormat::TILEOP_ND, "outCast");
outCast->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto ubTensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape2, CreateTestConstIntVector(shape2));
ubTensor1->SetMemoryTypeOriginal(MemoryType::MEM_UB, false);
auto ubTensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape2, CreateTestConstIntVector(shape2));
ubTensor2->SetMemoryTypeOriginal(MemoryType::MEM_UB, false);
auto ubTensor3 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
ubTensor3->SetMemoryTypeOriginal(MemoryType::MEM_UB, false);
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_VIEW, {inCast1}, {ubTensor1},
[&offset1](Operation& op) {
op.SetOpAttribute(std::make_shared<ViewOpAttribute>(offset1));
});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_VIEW, {inCast1}, {ubTensor2},
[&offset2](Operation& op) {
op.SetOpAttribute(std::make_shared<ViewOpAttribute>(offset2));
});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_MUL, {inCast2, inCast3}, {ubTensor3});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_ASSEMBLE, {ubTensor1}, {outCast},
[&offset1](Operation& op) {
op.SetOpAttribute(std::make_shared<AssembleOpAttribute>(offset1));
});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_ASSEMBLE, {ubTensor2}, {outCast},
[&offset2](Operation& op) {
op.SetOpAttribute(std::make_shared<AssembleOpAttribute>(offset2));
});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_ASSEMBLE, {ubTensor3}, {outCast},
[&offset3](Operation& op) {
op.SetOpAttribute(std::make_shared<AssembleOpAttribute>(offset3));
});
currFunctionPtr->inCasts_.push_back(inCast1);
currFunctionPtr->inCasts_.push_back(inCast2);
currFunctionPtr->inCasts_.push_back(inCast3);
currFunctionPtr->outCasts_.push_back(outCast);
RemoveRedundantOp RemoveRedundantOpPass;
EXPECT_EQ(RemoveRedundantOpPass.RunOnFunction(*currFunctionPtr), SUCCESS);
uint32_t viewNum = kNumZero;
uint32_t assembleNum = kNumZero;
for (const auto& op : currFunctionPtr->Operations()) {
if (op.GetOpcode() == Opcode::OP_VIEW) {
++viewNum;
}
if (op.GetOpcode() == Opcode::OP_ASSEMBLE) {
++assembleNum;
}
}
EXPECT_EQ(viewNum, kNumTwo);
EXPECT_EQ(assembleNum, kNumThree);
}
TestGenerateViewDynOffsetCase
inCast{8,16}->view->Tensor1{4,16}->assemble->Tensor2{4,16}->exp->outCast{4,16}
inCast{8,16}->view->Tensor1{4,16}->exp->outCast{4,16}
*/
TEST_F(TestRemoveRedundantOpPass, TestGenerateViewDynOffsetCase)
{
auto currFunctionPtr =
std::make_shared<Function>(Program::GetInstance(), "TestRemoveRedundantOp", "TestRemoveRedundantOp", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
uint32_t dynOffset = 0;
std::vector<int64_t> shape = {kNumEight, kNumExpFour};
std::vector<int64_t> shape1 = {kNumFour, kNumExpFour};
std::vector<int64_t> offset = {kNumZero, kNumZero};
std::vector<SymbolicScalar> newDynOffset{dynOffset, dynOffset};
auto inCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto outCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1), TileOpFormat::TILEOP_ND, "outCast");
outCast->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto ubTensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
ubTensor1->SetMemoryTypeOriginal(MemoryType::MEM_UB, false);
auto ubTensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
ubTensor2->SetMemoryTypeOriginal(MemoryType::MEM_UB, false);
auto& viewOp = PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_VIEW, {inCast}, {ubTensor1},
[&offset, &newDynOffset](Operation& op) {
op.SetOpAttribute(std::make_shared<ViewOpAttribute>(offset, newDynOffset));
});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_ASSEMBLE, {ubTensor1}, {ubTensor2},
[&offset](Operation& op) {
op.SetOpAttribute(std::make_shared<AssembleOpAttribute>(offset));
});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXP, {ubTensor2}, {outCast});
currFunctionPtr->inCasts_.push_back(inCast);
currFunctionPtr->outCasts_.push_back(outCast);
RemoveRedundantOp RemoveRedundantOpPass;
EXPECT_EQ(RemoveRedundantOpPass.RunOnFunction(*currFunctionPtr), SUCCESS);
uint32_t viewNum = kNumZero;
uint32_t assembleNum = kNumZero;
auto viewOpAttribute = dynamic_cast<ViewOpAttribute*>(viewOp.GetOpAttribute().get());
for (const auto& op : currFunctionPtr->Operations()) {
if (op.GetOpcode() == Opcode::OP_VIEW) {
++viewNum;
}
if (op.GetOpcode() == Opcode::OP_ASSEMBLE) {
++assembleNum;
}
}
EXPECT_EQ(viewNum, kNumOne);
EXPECT_EQ(assembleNum, kNumZero);
EXPECT_EQ(viewOpAttribute->GetFromDynOffset()[0].Dump(), "0");
EXPECT_EQ(viewOpAttribute->GetFromDynOffset()[1].Dump(), "0");
}
TestOutcastMutiConsumerCase
inCast{8,16}->view->Tensor1{4,16}->assemble->outCast1{4,16}
->exp->Tensor2{4,16}->exp->outCast2{4,16}
inCast{8,16}->view->outCast1{4,16}->exp->Tensor2{4,16}->exp->outCast2{4,16}
*/
TEST_F(TestRemoveRedundantOpPass, TestOutcastMutiConsumerCase)
{
auto currFunctionPtr =
std::make_shared<Function>(Program::GetInstance(), "TestRemoveRedundantOp", "TestRemoveRedundantOp", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
std::vector<int64_t> shape = {kNumEight, kNumExpFour};
std::vector<int64_t> shape1 = {kNumFour, kNumExpFour};
std::vector<int64_t> offset = {kNumZero, kNumZero};
auto inCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto outCast1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1), TileOpFormat::TILEOP_ND, "outCast");
auto outCast2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1), TileOpFormat::TILEOP_ND, "outCast");
outCast1->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
outCast2->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto ddrTensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
ddrTensor1->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto ubTensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
ubTensor2->SetMemoryTypeOriginal(MemoryType::MEM_UB, false);
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_ASSEMBLE, {ddrTensor1}, {outCast1},
[&offset](Operation& op) {
op.SetOpAttribute(std::make_shared<AssembleOpAttribute>(offset));
});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXP, {inCast}, {ddrTensor1});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXP, {ddrTensor1}, {ubTensor2});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXP, {ubTensor2}, {outCast2});
currFunctionPtr->inCasts_.push_back(inCast);
currFunctionPtr->outCasts_.push_back(outCast1);
currFunctionPtr->outCasts_.push_back(outCast2);
RemoveRedundantOp RemoveRedundantOpPass;
EXPECT_EQ(RemoveRedundantOpPass.RunOnFunction(*currFunctionPtr), SUCCESS);
uint32_t opNum = currFunctionPtr->Operations().size();
EXPECT_EQ(opNum, kNumThree);
}
TEST DynamicOutcast
inCast{8,16}->exp->ubTensor1{8,16}->view->ubTensor1{4,16}->assemble->outCast1{-1,16}
dynamic-axis, cannot delete
*/
TEST_F(TestRemoveRedundantOpPass, DynamicOutcast)
{
auto currFunctionPtr =
std::make_shared<Function>(Program::GetInstance(), "TestRemoveRedundantOp", "TestRemoveRedundantOp", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
std::vector<int64_t> shape1 = {kNumEight, kNumExpFour};
std::vector<int64_t> shape2 = {kNumExpFour, kNumExpFour};
std::vector<int64_t> shape3 = {kNumNegOne, kNumExpFour};
std::vector<int64_t> offset = {kNumZero, kNumZero};
auto inCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
inCast->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto ubTensor1 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape1, CreateTestConstIntVector(shape1));
ubTensor1->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto ubTensor2 = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape2, CreateTestConstIntVector(shape2));
ubTensor2->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto outCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape3, CreateTestConstIntVector(shape3), TileOpFormat::TILEOP_ND, "outCast");
outCast->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
currFunctionPtr->inCasts_.push_back(inCast);
currFunctionPtr->outCasts_.push_back(outCast);
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXP, {inCast}, {ubTensor1});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_VIEW, {ubTensor1}, {ubTensor2},
[&offset](Operation& op) {
op.SetOpAttribute(std::make_shared<ViewOpAttribute>(offset));
});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_ASSEMBLE, {ubTensor2}, {outCast},
[&offset](Operation& op) {
op.SetOpAttribute(std::make_shared<AssembleOpAttribute>(offset));
});
RemoveRedundantOp removeredundantpass;
EXPECT_EQ(removeredundantpass.PreCheck(*currFunctionPtr), SUCCESS);
EXPECT_EQ(removeredundantpass.RunOnFunction(*currFunctionPtr), SUCCESS);
EXPECT_EQ(removeredundantpass.PostCheck(*currFunctionPtr), SUCCESS);
uint32_t assemble_num = kNumZero;
uint32_t view_num = kNumZero;
for (auto& op : currFunctionPtr->Operations()) {
if (op.GetOpcode() == Opcode::OP_ASSEMBLE) {
++assemble_num;
}
if (op.GetOpcode() == Opcode::OP_VIEW) {
++view_num;
}
}
EXPECT_EQ(assemble_num, kNumOne);
EXPECT_EQ(view_num, kNumOne);
}
TEST_F(TestRemoveRedundantOpPass, AssembleDDR)
{
auto func = std::make_shared<Function>(Program::GetInstance(),
"TestAssembleDDRNoConsumer",
"TestAssembleDDRNoConsumer",
nullptr);
EXPECT_TRUE(func != nullptr);
std::vector<int64_t> shape = { 64, 64 };
auto inTensor = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto ddrOut = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
ddrOut->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR);
PassOperationUtils::AddOperation(*func, Opcode::OP_ASSEMBLE, { inTensor }, { ddrOut });
func->inCasts_.push_back(inTensor);
RemoveRedundantOp pass;
EXPECT_EQ(pass.PreCheck(*func), FAILED);
}
TEST_F(TestRemoveRedundantOpPass, ViewOp_OutCast)
{
auto func = std::make_shared<Function>(Program::GetInstance(),
"TestView_OutCast_Cover75",
"TestView_OutCast_Cover75",
nullptr);
EXPECT_TRUE(func != nullptr);
std::vector<int64_t> shape = {64, 64};
auto in = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto view_out = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
PassOperationUtils::AddOperation(*func, Opcode::OP_VIEW, {in}, {view_out});
func->inCasts_.push_back(in);
func->outCasts_.push_back(view_out);
auto dummy_out = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
PassOperationUtils::AddOperation(*func, (Opcode::OP_COPY_IN), {view_out}, {dummy_out});
RemoveRedundantOp pass;
Status ret = pass.PreCheck(*func);
EXPECT_EQ(ret, FAILED);
}
TEST_F(TestRemoveRedundantOpPass, RegCopyNoConsumer)
{
auto func = std::make_shared<Function>(Program::GetInstance(),
"TestRegCopyNoConsumer",
"TestRegCopyNoConsumer",
nullptr);
EXPECT_TRUE(func != nullptr);
std::vector<int64_t> shape = {64, 64};
auto in = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
auto out = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape, CreateTestConstIntVector(shape));
PassOperationUtils::AddOperation(*func, Opcode::OP_REGISTER_COPY, {in}, {out});
func->inCasts_.push_back(in);
RemoveRedundantOp pass;
Status ret = pass.PreCheck(*func);
EXPECT_EQ(ret, FAILED);
}
TestDynValidShapeInference
验证删除 assemble 时 DynValidShape 的正确推导:
inCast{8,16}->exp->Tensor1{8,16}[exp_dim0, exp_dim1]->Reshape->Tensor2{16,8}[reshape_dim0,
reshape_dim1]->assemble->outCast{16,8}
验证逻辑:
1. 初始 outCast 使用 shape 作为默认 concrete DynValidShape
2. 删除 assemble 后,验证 outCast 正确继承 Reshape 输出的 DynValidShape
3. 验证 DynValidShape 推导的完整过程(从输入 tensor 继承到输出 tensor)
*/
TEST_F(TestRemoveRedundantOpPass, TestDynValidShapeInference)
{
auto currFunctionPtr = std::make_shared<Function>(
Program::GetInstance(), "TestDynValidShapeInference", "TestDynValidShapeInference", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
std::vector<int64_t> shape8x16 = {kNumEight, kNumExpFour};
std::vector<int64_t> shape16x8 = {kNumExpFour, kNumEight};
auto inCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape8x16, CreateTestConstIntVector(shape8x16));
inCast->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto expOutput = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape8x16, CreateTestConstIntVector(shape8x16));
expOutput->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
expOutput->UpdateDynValidShape({CreateTestScalarVar("exp_output_dim0"), CreateTestScalarVar("exp_output_dim1")});
auto reshapeOutput = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape16x8, CreateTestConstIntVector(shape16x8));
reshapeOutput->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
reshapeOutput->UpdateDynValidShape({CreateTestScalarVar("reshape_output_dim0"), CreateTestScalarVar("reshape_output_dim1")});
auto outCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape16x8, CreateTestConstIntVector(shape16x8), TileOpFormat::TILEOP_ND, "outCast");
outCast->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXP, {inCast}, {expOutput});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_RESHAPE, {expOutput}, {reshapeOutput});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_ASSEMBLE, {reshapeOutput}, {outCast});
currFunctionPtr->inCasts_.push_back(inCast);
currFunctionPtr->outCasts_.push_back(outCast);
auto outcastBeforePass = currFunctionPtr->GetOutcast()[0];
ASSERT_EQ(outcastBeforePass->GetDynValidShape().size(), kNumTwo);
EXPECT_EQ(outcastBeforePass->GetDynValidShape()[0].Dump(), "reshape_output_dim0");
EXPECT_EQ(outcastBeforePass->GetDynValidShape()[1].Dump(), "reshape_output_dim1");
RemoveRedundantOp pass;
EXPECT_EQ(pass.PreCheck(*currFunctionPtr), SUCCESS);
EXPECT_EQ(pass.RunOnFunction(*currFunctionPtr), SUCCESS);
EXPECT_EQ(pass.PostCheck(*currFunctionPtr), SUCCESS);
uint32_t assembleNum = kNumZero;
uint32_t reshapeNum = kNumZero;
for (const auto& op : currFunctionPtr->Operations()) {
if (op.GetOpcode() == Opcode::OP_ASSEMBLE) {
++assembleNum;
}
if (op.GetOpcode() == Opcode::OP_RESHAPE) {
++reshapeNum;
}
}
auto outcastAfterPass = currFunctionPtr->GetOutcast()[0];
EXPECT_FALSE(outcastAfterPass->GetDynValidShape().empty());
EXPECT_EQ(outcastAfterPass->GetDynValidShape().size(), kNumTwo);
EXPECT_EQ(outcastAfterPass->GetDynValidShape()[0].Dump(), "reshape_output_dim0");
EXPECT_EQ(outcastAfterPass->GetDynValidShape()[1].Dump(), "reshape_output_dim1");
EXPECT_EQ(assembleNum, kNumZero);
EXPECT_EQ(reshapeNum, kNumOne);
}
TestNewViewDynValidShapeInference
验证插入新 view 时 DynValidShape 的正确推导(Case2: GenerateNewView):
inCast{8,16}->view->Tensor1{4,16}[view_dim0, view_dim1]->assemble->Tensor2{4,16}->exp->outCast{4,16}
(offset=[0,0])
删除 assemble 后插入新 view:
inCast{8,16}->view(Tensor2)->exp->outCast{4,16}
验证逻辑:
1. viewOutput和assembleOutput共享同一个RawTensor(这是正常情况)
2. viewOutput设置了DynValidShape
3. 删除assemble并插入新view后,验证:
- 新view的输出Tensor正确继承RawTensor的DynValidShape(即viewOutput的DynValidShape)
- 验证DynValidShape在RawTensor层面的正确传播
*/
TEST_F(TestRemoveRedundantOpPass, TestNewViewDynValidShapeInference)
{
auto currFunctionPtr = std::make_shared<Function>(
Program::GetInstance(), "TestNewViewDynValidShape", "TestNewViewDynValidShape", nullptr);
EXPECT_TRUE(currFunctionPtr != nullptr);
std::vector<int64_t> shape8x16 = {kNumEight, kNumExpFour};
std::vector<int64_t> shape4x16 = {kNumFour, kNumExpFour};
std::vector<int64_t> offset = {kNumZero, kNumZero};
auto inCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape8x16, CreateTestConstIntVector(shape8x16));
inCast->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
auto viewOutput = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape4x16, CreateTestConstIntVector(shape4x16));
viewOutput->SetMemoryTypeOriginal(MemoryType::MEM_UB, false);
viewOutput->UpdateDynValidShape({CreateTestScalarVar("view_output_dim0"), CreateTestScalarVar("view_output_dim1")});
auto assembleOutput = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape4x16, CreateTestConstIntVector(shape4x16));
assembleOutput->SetMemoryTypeOriginal(MemoryType::MEM_UB, false);
auto outCast = npu::tile_fwk::IRBuilder().CreateTensorVar(DT_FP32, shape4x16, CreateTestConstIntVector(shape4x16), TileOpFormat::TILEOP_ND, "outCast");
outCast->SetMemoryTypeOriginal(MemoryType::MEM_DEVICE_DDR, false);
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_VIEW, {inCast}, {viewOutput},
[&offset](Operation& op) {
op.SetOpAttribute(std::make_shared<ViewOpAttribute>(offset));
});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_ASSEMBLE, {viewOutput}, {assembleOutput},
[&offset](Operation& op) {
op.SetOpAttribute(std::make_shared<AssembleOpAttribute>(offset));
});
PassOperationUtils::AddOperation(*currFunctionPtr, Opcode::OP_EXP, {assembleOutput}, {outCast});
currFunctionPtr->inCasts_.push_back(inCast);
currFunctionPtr->outCasts_.push_back(outCast);
RemoveRedundantOp pass;
EXPECT_EQ(pass.RunOnFunction(*currFunctionPtr), SUCCESS);
EXPECT_EQ(CountOpcode(currFunctionPtr, Opcode::OP_VIEW), kNumOne);
EXPECT_EQ(CountOpcode(currFunctionPtr, Opcode::OP_ASSEMBLE), kNumZero);
const Operation* newViewOp = FindSingleOp(currFunctionPtr, Opcode::OP_VIEW);
EXPECT_TRUE(newViewOp != nullptr);
auto viewAttribute = dynamic_cast<ViewOpAttribute*>(newViewOp->GetOpAttribute().get());
EXPECT_TRUE(viewAttribute != nullptr);
auto newViewOutput = newViewOp->GetOOperands()[0];
EXPECT_FALSE(newViewOutput->GetDynValidShape().empty());
EXPECT_EQ(newViewOutput->GetDynValidShape().size(), kNumTwo);
EXPECT_EQ(newViewOutput->GetDynValidShape()[0].Dump(), "view_output_dim0");
EXPECT_EQ(newViewOutput->GetDynValidShape()[1].Dump(), "view_output_dim1");
}
}
}