* Copyright (c) 2026 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
* \file test_device_task_context.cpp
* \brief Unit tests for DeviceTaskContext, DeviceStitchContext, DeviceExecuteContext (includes former
* test_machine_encode_coverage cases).
*/
#include <gtest/gtest.h>
#include <array>
#include <cstdlib>
#include <cstring>
#include <memory>
#include <fstream>
#include <cstdio>
#define private public
#define protected public
#include "interface/configs/config_manager.h"
#include "machine/device/dynamic/context/device_task_context.h"
#include "machine/utils/dynamic/dev_workspace.h"
#include "machine/utils/dynamic/dev_encode_function_dupped_data.h"
#include "interface/inner/tilefwk.h"
#include "interface/program/program.h"
#include "machine/device/dynamic/context/device_task_context.h"
#include "machine/device/dynamic/context/device_stitch_context.h"
#include "machine/device/dynamic/context/device_execute_context.h"
#include "machine/device/dynamic/context/device_slot_context.h"
#include "machine/utils/dynamic/dev_start_args.h"
#include "machine/utils/dynamic/dev_workspace.h"
#include "interface/machine/device/tilefwk/aikernel_data.h"
#include "interface/tileop/distributed/comm_context.h"
#include "tilefwk/data_type.h"
#include "tilefwk/platform.h"
#include "tilefwk/tilefwk.h"
using namespace npu::tile_fwk;
using namespace npu::tile_fwk::dynamic;
class TestDeviceTaskContext : public testing::Test {
public:
static void SetUpTestCase() {}
static void TearDownTestCase() {}
void SetUp() override { Platform::Instance().GetSoc().SetNPUArch(NPUArch::DAV_3510); }
void TearDown() override { Platform::Instance().GetSoc().SetNPUArch(NPUArch::DAV_UNKNOWN); }
protected:
void CreateMockDynDeviceTask(DynDeviceTask* dyntask, uint32_t coreFunctionCnt = 100)
{
if (dyntask == nullptr) {
return;
}
dyntask->devTask.coreFunctionCnt = coreFunctionCnt;
dyntask->dynFuncDataCacheListSize = 0;
for (size_t i = 0; i < DIE_NUM; i++) {
dyntask->devTask.dieReadyFunctionQue.readyDieAivCoreFunctionQue[i] = 0;
dyntask->devTask.dieReadyFunctionQue.readyDieAicCoreFunctionQue[i] = 0;
}
}
void CreateMockDevAscendProgram(DevAscendProgram* devProg, ArchInfo archInfo)
{
if (devProg == nullptr) {
return;
}
devProg->devArgs.archInfo = archInfo;
devProg->ctrlFlowCacheAnchor = &devProg->controlFlowCache;
devProg->controlFlowCache.isRecording = false;
devProg->controlFlowCache.isRecordingStopped = false;
devProg->controlFlowCache.cacheDataOffset = 0;
devProg->stitchMaxFunctionNum = 10;
devProg->stitchFunctionsize = 100;
}
DevAscendFunction* CreateDevAscendFunctionBuffer(
std::unique_ptr<uint8_t[]>& funcBuffer, uint8_t*& funcDataPtr, size_t kOpCount, size_t kFuncBufferSize)
{
(void)kOpCount;
funcBuffer = std::make_unique<uint8_t[]>(kFuncBufferSize);
memset_s(funcBuffer.get(), kFuncBufferSize, 0, kFuncBufferSize);
funcDataPtr = funcBuffer.get();
DevAscendFunction* devFunc = reinterpret_cast<DevAscendFunction*>(funcDataPtr);
funcDataPtr += sizeof(DevAscendFunction);
devFunc->rootHash = 0x12345678;
devFunc->funcKey = 100;
devFunc->sourceFunc = nullptr;
return devFunc;
}
void SetupDevAscendFunctionData(
DevAscendFunction* devFunc, uint8_t* funcDataPtr, uint8_t* funcBuffer, size_t kOpCount)
{
size_t currentOffset = sizeof(DevAscendFunction);
auto alignUp = [¤tOffset](size_t alignment) {
currentOffset = (currentOffset + alignment - 1) & ~(alignment - 1);
};
alignUp(alignof(SymInt));
devFunc->operationAttrList_.AssignOffsetSize(currentOffset, kOpCount);
SymInt* attrData = reinterpret_cast<SymInt*>(funcDataPtr);
for (size_t i = 0; i < kOpCount; i++) {
attrData[i] = SymInt(static_cast<uint64_t>(0));
}
currentOffset += kOpCount * sizeof(SymInt);
funcDataPtr += kOpCount * sizeof(SymInt);
alignUp(alignof(int32_t));
devFunc->opAttrOffsetList_.AssignOffsetSize(currentOffset, kOpCount);
int32_t* attrOffsets = reinterpret_cast<int32_t*>(funcDataPtr);
for (size_t i = 0; i < kOpCount; i++) {
attrOffsets[i] = static_cast<int32_t>(i);
}
currentOffset += kOpCount * sizeof(int32_t);
funcDataPtr += kOpCount * sizeof(int32_t);
alignUp(alignof(DevAscendOperation));
devFunc->operationList_.AssignOffsetSize(currentOffset, kOpCount);
DevAscendOperation* ops = reinterpret_cast<DevAscendOperation*>(funcDataPtr);
for (size_t i = 0; i < kOpCount; i++) {
new (&ops[i]) DevAscendOperation();
ops[i].debugOpmagic = static_cast<uint64_t>(i + 1);
size_t attrOffset = reinterpret_cast<uint8_t*>(attrData + i) - funcBuffer;
ops[i].attrList.AssignOffsetSize(attrOffset, 1);
ops[i].depGraphSuccList.AssignOffsetSize(0, 0);
ops[i].depGraphPredCount = 0;
ops[i].stitchIndex = 0;
}
}
DevAscendFunctionDuppedData* CreateDevAscendFunctionDuppedData(
std::unique_ptr<uint8_t[]>& duppedDataBuffer, uint8_t*& duppedDataPtr, DevAscendFunction* devFunc,
size_t kOpCount, size_t kDuppedDataBufferSize)
{
duppedDataBuffer = std::make_unique<uint8_t[]>(kDuppedDataBufferSize);
memset_s(duppedDataBuffer.get(), kDuppedDataBufferSize, 0, kDuppedDataBufferSize);
duppedDataPtr = duppedDataBuffer.get();
DevAscendFunctionDuppedData* duppedData = reinterpret_cast<DevAscendFunctionDuppedData*>(duppedDataPtr);
duppedDataPtr += sizeof(DevAscendFunctionDuppedData);
duppedData->source_ = devFunc;
duppedData->operationList_.size = kOpCount;
duppedData->operationList_.predCountBase = static_cast<uint32_t>(duppedDataPtr - duppedDataBuffer.get());
duppedData->operationList_.stitchBase =
duppedData->operationList_.predCountBase + kOpCount * sizeof(predcount_t);
duppedData->operationList_.stitchCount = 1;
predcount_t* predCounts = reinterpret_cast<predcount_t*>(duppedDataPtr);
for (size_t i = 0; i < kOpCount; i++) {
predCounts[i] = 0;
}
duppedDataPtr += kOpCount * sizeof(predcount_t);
for (size_t i = 0; i <= kOpCount; i++) {
new (duppedDataPtr + i * sizeof(DevAscendFunctionDuppedStitchList)) DevAscendFunctionDuppedStitchList();
}
duppedData->incastList_.size = 0;
duppedData->incastList_.base = 0;
duppedData->outcastList_.size = 0;
duppedData->outcastList_.base = 0;
duppedData->expressionList_.size = 0;
duppedData->expressionList_.base = 0;
return duppedData;
}
void SetupTestEnvironment(
DeviceTask& devTask, std::unique_ptr<int32_t[]>& opWrapListData, DevCceBinary* cceBinary, size_t kOpCount)
{
opWrapListData = std::make_unique<int32_t[]>(kOpCount);
for (size_t i = 0; i < kOpCount; i++) {
opWrapListData[i] = static_cast<int32_t>(i);
}
devTask.mixTaskData.wrapIdNum = 1;
devTask.mixTaskData.opWrapList[0] = reinterpret_cast<uint64_t>(opWrapListData.get());
cceBinary[0].coreType = 0;
cceBinary[0].psgId = 0;
cceBinary[0].funcHash = 0xABCDEF00;
}
void VerifyDumpTopoOutput(const std::string& testFilePath, size_t expectedLineCount)
{
std::ifstream inFile(testFilePath);
ASSERT_TRUE(inFile.is_open());
std::string line;
size_t lineCount = 0;
while (std::getline(inFile, line)) {
lineCount++;
EXPECT_FALSE(line.empty());
}
inFile.close();
EXPECT_EQ(lineCount, expectedLineCount);
}
WrapInfoQueue* SetupWrapQueueForTest(
DynDeviceTask* dyntask, DeviceTaskContext& taskContext, CoreType coreType, uint32_t wrapVecId,
DevAscendFunction& devFunc, DevCceBinary* cceBinary, int* calleeList, uint16_t* opWrapOffsetList = nullptr)
{
devFunc.wrapIdNum_ = 1;
dyntask->dynFuncDataCacheList[0].devFunc = &devFunc;
dyntask->dynFuncDataCacheListSize = 1;
dyntask->devTask.mixTaskData.wrapIdNum = 1;
dyntask->dynFuncDataCacheList[0].calleeList = calleeList;
dyntask->devTask.mixTaskData.opWrapOffsetList[0] = opWrapOffsetList;
cceBinary[0].coreType = static_cast<uint32_t>(coreType);
cceBinary[0].wrapVecId = wrapVecId;
cceBinary[0].mixResourceType = 0;
dyntask->cceBinary = cceBinary;
return taskContext.AllocWrapQueue(dyntask);
}
void SetupBasicTaskContext(
DeviceTaskContext& taskContext, DevStartArgsBase& startArgs, DevAscendProgram* devProg,
std::unique_ptr<DynDeviceTask>& dyntask, DeviceWorkspaceAllocator& workspace,
std::unique_ptr<uint8_t[]>& controlFlowCacheBuf, size_t kControlFlowCacheSize)
{
if (controlFlowCacheBuf != nullptr) {
devProg->controlFlowCache.cacheData =
DevRelocVector<uint8_t>(kControlFlowCacheSize, controlFlowCacheBuf.get());
devProg->controlFlowCache.isRecording = true;
}
taskContext.InitAllocator(devProg, workspace, &startArgs);
dyntask = std::make_unique<DynDeviceTask>(workspace);
CreateMockDynDeviceTask(dyntask.get(), 100);
}
};
TEST_F(TestDeviceTaskContext, test_build_ready_queue_calls_wrap_functions)
{
DeviceTaskContext taskContext;
DevStartArgsBase startArgs;
constexpr size_t kControlFlowCacheSize = 64 * 1024;
auto controlFlowCacheBuf = std::make_unique<uint8_t[]>(kControlFlowCacheSize);
DevAscendProgram devProg;
CreateMockDevAscendProgram(&devProg, ArchInfo::DAV_3510);
devProg.stitchFunctionsize = 100;
devProg.controlFlowCache.cacheData = DevRelocVector<uint8_t>(kControlFlowCacheSize, controlFlowCacheBuf.get());
devProg.controlFlowCache.isRecording = true;
DeviceWorkspaceAllocator workspace(&devProg);
taskContext.InitAllocator(&devProg, workspace, &startArgs);
auto dyntask = std::make_unique<DynDeviceTask>(workspace);
CreateMockDynDeviceTask(dyntask.get(), 100);
DevAscendFunction devFunc;
devFunc.wrapIdNum_ = 1;
dyntask->dynFuncDataCacheList[0].devFunc = &devFunc;
dyntask->dynFuncDataCacheListSize = 1;
dyntask->devTask.mixTaskData.wrapIdNum = 1;
bool isNeedWrap = taskContext.IsNeedWrapProcess(dyntask.get(), &devProg);
EXPECT_TRUE(isNeedWrap);
WrapInfoQueue* wrapQueue = taskContext.AllocWrapQueue(dyntask.get());
EXPECT_NE(wrapQueue, nullptr);
EXPECT_EQ(wrapQueue->head, 0);
EXPECT_EQ(wrapQueue->tail, 0);
EXPECT_GT(wrapQueue->capacity, 0);
}
TEST_F(TestDeviceTaskContext, ShowStats_HitsDevErrorMacroLines)
{
DeviceTaskContext taskContext;
taskContext.ShowStats();
}
TEST_F(TestDeviceTaskContext, InitReadyQueues_ExceedsStitchSize_ReturnsError)
{
DeviceTaskContext taskContext;
DevStartArgsBase startArgs;
DevAscendProgram devProg;
CreateMockDevAscendProgram(&devProg, ArchInfo::DAV_3510);
devProg.stitchFunctionsize = 10;
DeviceWorkspaceAllocator workspace(&devProg);
taskContext.InitAllocator(&devProg, workspace, &startArgs);
auto dyntask = std::make_unique<DynDeviceTask>(workspace);
CreateMockDynDeviceTask(dyntask.get(), 100U);
ReadyCoreFunctionQueue* queues[READY_QUEUE_SIZE] = {};
EXPECT_EQ(taskContext.InitReadyQueues(dyntask.get(), &devProg, queues), DEVICE_MACHINE_ERROR);
}
TEST_F(TestDeviceTaskContext, test_init_die_ready_queues_mix_arch)
{
DeviceTaskContext taskContext;
DevStartArgsBase startArgs;
constexpr size_t kControlFlowCacheSize = 64 * 1024;
auto controlFlowCacheBuf = std::make_unique<uint8_t[]>(kControlFlowCacheSize);
DevAscendProgram devProg;
CreateMockDevAscendProgram(&devProg, ArchInfo::DAV_3510);
devProg.controlFlowCache.cacheData = DevRelocVector<uint8_t>(kControlFlowCacheSize, controlFlowCacheBuf.get());
devProg.controlFlowCache.isRecording = true;
DeviceWorkspaceAllocator workspace(&devProg);
taskContext.InitAllocator(&devProg, workspace, &startArgs);
auto dyntask = std::make_unique<DynDeviceTask>(workspace);
CreateMockDynDeviceTask(dyntask.get(), 100);
taskContext.InitDieReadyQueues(dyntask.get(), &devProg);
for (size_t i = 0; i < DIE_NUM; i++) {
EXPECT_NE(dyntask->devTask.dieReadyFunctionQue.readyDieAivCoreFunctionQue[i], 0UL);
EXPECT_NE(dyntask->devTask.dieReadyFunctionQue.readyDieAicCoreFunctionQue[i], 0UL);
auto aivQueue = reinterpret_cast<ReadyCoreFunctionQueue*>(
dyntask->devTask.dieReadyFunctionQue.readyDieAivCoreFunctionQue[i]);
auto aicQueue = reinterpret_cast<ReadyCoreFunctionQueue*>(
dyntask->devTask.dieReadyFunctionQue.readyDieAicCoreFunctionQue[i]);
EXPECT_NE(aivQueue, nullptr);
EXPECT_NE(aicQueue, nullptr);
EXPECT_EQ(aivQueue->head_, 0U);
EXPECT_EQ(aivQueue->tail_, 0U);
EXPECT_EQ(aicQueue->head_, 0U);
EXPECT_EQ(aicQueue->tail_, 0U);
}
}
TEST_F(TestDeviceTaskContext, test_build_ready_queue_core_function_mix_arch)
{
DeviceTaskContext taskContext;
DevStartArgsBase startArgs;
constexpr size_t kControlFlowCacheSize = 64 * 1024;
auto controlFlowCacheBuf = std::make_unique<uint8_t[]>(kControlFlowCacheSize);
DevAscendProgram devProg;
CreateMockDevAscendProgram(&devProg, ArchInfo::DAV_3510);
devProg.stitchFunctionsize = 10;
devProg.controlFlowCache.cacheData = DevRelocVector<uint8_t>(kControlFlowCacheSize, controlFlowCacheBuf.get());
devProg.controlFlowCache.isRecording = true;
DeviceWorkspaceAllocator workspace(&devProg);
taskContext.InitAllocator(&devProg, workspace, &startArgs);
auto dyntask = std::make_unique<DynDeviceTask>(workspace);
CreateMockDynDeviceTask(dyntask.get(), 8);
DevAscendFunction devFunc;
DevAscendFunctionDuppedData duppedData{};
duppedData.loopDieId_ = 1;
duppedData.source_ = &devFunc;
devFunc.predInfo_.totalZeroPredAIV = 0;
devFunc.predInfo_.totalZeroPredAIC = 0;
devFunc.predInfo_.totalZeroPredAicpu = 0;
dyntask->dynFuncDataCacheList[0].devFunc = &devFunc;
dyntask->dynFuncDataCacheList[0].duppedData = &duppedData;
dyntask->dynFuncDataCacheListSize = 1;
int ret = taskContext.BuildReadyQueue(dyntask.get(), &devProg);
EXPECT_EQ(ret, DEVICE_MACHINE_OK);
}
TEST_F(TestDeviceTaskContext, test_build_ready_queue_dupped_data)
{
DeviceTaskContext taskContext;
DevStartArgsBase startArgs;
constexpr size_t kControlFlowCacheSize = 64 * 1024 * 8;
auto controlFlowCacheBuf = std::make_unique<uint8_t[]>(kControlFlowCacheSize);
DevAscendProgram devProg;
CreateMockDevAscendProgram(&devProg, ArchInfo::DAV_3510);
devProg.controlFlowCache.cacheData = DevRelocVector<uint8_t>(kControlFlowCacheSize, controlFlowCacheBuf.get());
devProg.controlFlowCache.isRecording = true;
DeviceWorkspaceAllocator workspace(&devProg);
taskContext.InitAllocator(&devProg, workspace, &startArgs);
auto dyntask = std::make_unique<DynDeviceTask>(workspace);
CreateMockDynDeviceTask(dyntask.get(), 100);
constexpr size_t kOpCount = 32;
constexpr size_t kFuncBufferSize = kOpCount * 1024;
constexpr size_t kDuppedDataBufferSize = kOpCount * 512;
std::unique_ptr<uint8_t[]> funcBuffer;
uint8_t* funcDataPtr;
DevAscendFunction* devFunc = CreateDevAscendFunctionBuffer(funcBuffer, funcDataPtr, kOpCount, kFuncBufferSize);
SetupDevAscendFunctionData(devFunc, funcDataPtr, funcBuffer.get(), kOpCount);
std::unique_ptr<uint8_t[]> duppedDataBuffer;
uint8_t* duppedDataPtr;
DevAscendFunctionDuppedData* duppedData =
CreateDevAscendFunctionDuppedData(duppedDataBuffer, duppedDataPtr, devFunc, kOpCount, kDuppedDataBufferSize);
devFunc->predInfo_.totalZeroPredAIV = 10;
devFunc->predInfo_.totalZeroPredAIC = 10;
devFunc->predInfo_.totalZeroPredAicpu = 0;
dyntask->dynFuncDataCacheList[0].devFunc = devFunc;
dyntask->dynFuncDataCacheList[0].duppedData = duppedData;
dyntask->dynFuncDataCacheListSize = 1;
int ret = taskContext.BuildReadyQueue(dyntask.get(), &devProg);
auto aivQueue = reinterpret_cast<ReadyCoreFunctionQueue*>(
dyntask->devTask.dieReadyFunctionQue.readyDieAivCoreFunctionQue[0]);
auto aicQueue = reinterpret_cast<ReadyCoreFunctionQueue*>(
dyntask->devTask.dieReadyFunctionQue.readyDieAicCoreFunctionQue[0]);
EXPECT_EQ(aivQueue->head_, 0);
EXPECT_EQ(aivQueue->tail_, 10);
EXPECT_EQ(aicQueue->head_, 0);
EXPECT_EQ(aicQueue->tail_, 10);
ReadyCoreFunctionQueue::ValueType aivQueueGold[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
ReadyCoreFunctionQueue::ValueType aicQueueGold[] = {10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
EXPECT_TRUE(std::equal(aivQueue->begin(), aivQueue->end(), aivQueueGold));
EXPECT_TRUE(std::equal(aicQueue->begin(), aicQueue->end(), aicQueueGold));
EXPECT_EQ(ret, DEVICE_MACHINE_OK);
}
namespace {
void InitReadyQueueSlot(
ReadyCoreFunctionQueue& q, std::array<taskid_t, 4>& elemBuf, uint32_t head, uint32_t tail, taskid_t firstId)
{
new (&q) ReadyCoreFunctionQueue(elemBuf.size(), elemBuf.data());
q.UnsafeEnqueue(&elemBuf[0], tail);
q.Dequeue(head);
if (tail > head) {
elemBuf[0] = firstId;
}
}
void InitReadyQueueSlotMulti(
ReadyCoreFunctionQueue& q, std::array<taskid_t, 4>& elemBuf, uint32_t head, uint32_t tail,
const std::vector<taskid_t>& ids)
{
new (&q) ReadyCoreFunctionQueue(elemBuf.size(), elemBuf.data());
q.UnsafeEnqueue(&elemBuf[0], tail);
q.Dequeue(head);
for (size_t i = 0; i < ids.size() && (head + i) < tail && i < elemBuf.size(); ++i) {
elemBuf[i] = ids[i];
}
}
void ControlFlowSetError(
struct DeviceExecuteContext* ctx, int64_t* symbolTable, RuntimeCallEntryType runtimeCallList[T_RUNTIME_CALL_MAX],
DevStartArgsBase* startArgsBase)
{
(void)symbolTable;
(void)runtimeCallList;
(void)startArgsBase;
ctx->SetErrorState(DEVICE_MACHINE_ERROR);
}
}
TEST_F(TestDeviceTaskContext, DumpReadyQueue_CoversLoggingLines)
{
DeviceWorkspaceAllocator workspace;
auto dyntask = std::make_unique<DynDeviceTask>(workspace);
dyntask->devTask.coreFunctionCnt = 3;
std::array<taskid_t, 4> bufAiv{};
std::array<taskid_t, 4> bufAic{};
std::array<taskid_t, 4> bufAicpu{};
ReadyCoreFunctionQueue qslot[READY_QUEUE_SIZE];
InitReadyQueueSlot(qslot[0], bufAiv, 0, 1, MakeTaskID(0, 1));
InitReadyQueueSlot(qslot[1], bufAic, 0, 1, MakeTaskID(0, 2));
InitReadyQueueSlot(qslot[2], bufAicpu, 0, 1, MakeTaskID(0, 3));
for (size_t i = 0; i < READY_QUEUE_SIZE; ++i) {
dyntask->readyQueue[i] = &qslot[i];
}
DeviceTaskContext::DumpReadyQueue(dyntask.get(), "ut_cov");
}
TEST_F(TestDeviceTaskContext, DumpDepend_CoversHeadLoggingWithoutDupData)
{
DeviceWorkspaceAllocator workspace;
auto dyntask = std::make_unique<DynDeviceTask>(workspace);
dyntask->devTask.coreFunctionCnt = 4;
DynFuncHeader header{};
header.seqNo = 42;
header.funcNum = 0;
header.funcSize = sizeof(DynFuncHeader);
dyntask->dynFuncDataList = &header;
std::array<taskid_t, 4> bufAiv{};
std::array<taskid_t, 4> bufAic{};
std::array<taskid_t, 4> bufAicpu{};
ReadyCoreFunctionQueue qslot[READY_QUEUE_SIZE];
InitReadyQueueSlotMulti(qslot[0], bufAiv, 0, 2, {MakeTaskID(0, 0), MakeTaskID(0, 1)});
InitReadyQueueSlot(qslot[1], bufAic, 0, 1, MakeTaskID(1, 0));
InitReadyQueueSlot(qslot[2], bufAicpu, 0, 0, 0);
for (size_t i = 0; i < READY_QUEUE_SIZE; ++i) {
dyntask->readyQueue[i] = &qslot[i];
}
std::array<DevTensorData, 4> tensors{};
tensors[0].address = 0x1000ULL;
tensors[1].address = 0x1100ULL;
tensors[2].address = 0x2000ULL;
tensors[3].address = 0x2100ULL;
DevStartArgs startArgs{};
startArgs.contextWorkspaceAddr = 0x3000ULL;
startArgs.inputTensorSize = 2;
startArgs.outputTensorSize = 2;
startArgs.devTensorList = tensors.data();
DevAscendProgram devProg{};
DeviceTaskContext::DumpDepend(dyntask.get(), &devProg, &startArgs, "ut_cov");
}
TEST_F(TestDeviceTaskContext, DeviceExecute_InvalidCtx_ReturnsNull)
{
EXPECT_EQ(DeviceExecuteContext::DeviceExecuteRuntimeCallRootAlloc(nullptr, 0), nullptr);
EXPECT_EQ(DeviceExecuteContext::DeviceExecuteRuntimeCallRootStitch(nullptr, 0), nullptr);
}
TEST_F(TestDeviceTaskContext, DeviceExecuteRuntimeCallLog_IsNullSafe)
{
EXPECT_EQ(DeviceExecuteContext::DeviceExecuteRuntimeCallLog(nullptr, 7ULL), nullptr);
}
TEST_F(TestDeviceTaskContext, DeviceStitchContext_DumpStitchInfo_Empty)
{
DeviceStitchContext ctx;
ctx.DumpStitchInfo();
}
TEST_F(TestDeviceTaskContext, DeviceExecuteRuntimeCallShmemAllocator_ExceedsWinSize_LogsError)
{
alignas(64) unsigned char ctxBuf[sizeof(DeviceExecuteContext)];
(void)memset_s(ctxBuf, sizeof(ctxBuf), 0, sizeof(ctxBuf));
auto* ctx = reinterpret_cast<DeviceExecuteContext*>(ctxBuf);
TileOp::CommContext hc{};
hc.winDataSize = 64;
hc.winStatusSize = 32;
int64_t commPtrs[1] = {reinterpret_cast<int64_t>(&hc)};
DevStartArgs args{};
args.commGroupNum = 1;
args.commContexts = commPtrs;
ctx->args = &args;
ctx->shmemAddrOffset[0][0] = 0;
ctx->shmemAddrOffset[0][1] = 0;
uint64_t payload[] = {0, 0, 128, 8};
(void)DeviceExecuteContext::DeviceExecuteRuntimeCallShmemAllocator(ctx, reinterpret_cast<uint64_t>(payload));
}
TEST_F(TestDeviceTaskContext, DeviceStitchContext_MoveTo_TooManyFunctions_ReturnsError)
{
GTEST_SKIP() << "该场景在当前并行 death test 环境下易卡住,暂跳过。";
}
class TestMachineEncodeCoverage : public testing::Test {
protected:
void SetUp() override
{
Program::GetInstance().Reset();
config::Reset();
config::SetPlatformConfig(KEY_ENABLE_AIHAC_BACKEND, true);
TileShape::Current().SetVecTile(32, 32);
TileShape::Current().SetCubeTile({32, 32}, {32, 32}, {32, 32});
}
void TearDown() override
{
Program::GetInstance().Reset();
config::Reset();
}
};
TEST_F(TestMachineEncodeCoverage, MoveTo_MaxFunctionNumBoundary_ReturnsOk)
{
GTEST_SKIP() << "该边界场景在当前环境存在卡住风险,保留用例后续再收敛。";
}
TEST_F(TestMachineEncodeCoverage, FastStitch_SlotIdxBeyondSize_LogsAndContinues)
{
DevStartArgs args{};
DevAscendProgram prog{};
prog.controlFlowCache.isRecording = false;
args.devProg = &prog;
args.controlFlowEntry = reinterpret_cast<void*>(ControlFlowSetError);
DeviceExecuteContext ctx(&args);
EXPECT_EQ(ctx.RunControlFlow(&args), DEVICE_MACHINE_ERROR);
}
class TestDeviceExecuteContext : public testing::Test {
public:
static void SetUpTestCase() {}
static void TearDownTestCase() {}
void SetUp() override { Platform::Instance().GetSoc().SetNPUArch(NPUArch::DAV_3510); }
void TearDown() override { Platform::Instance().GetSoc().SetNPUArch(NPUArch::DAV_UNKNOWN); }
};
TEST_F(TestDeviceExecuteContext, test_runtime_call_get_loop_die_id)
{
alignas(alignof(DeviceExecuteContext)) char buffer[sizeof(DeviceExecuteContext)];
DeviceExecuteContext* ctx = reinterpret_cast<DeviceExecuteContext*>(buffer);
(void)memset_s(buffer, sizeof(DeviceExecuteContext), 0, sizeof(DeviceExecuteContext));
ctx->loopDieId_ = -1;
void* result = DeviceExecuteContext::DeviceExecuteRuntimeCallGetLoopDieId(ctx, 0);
EXPECT_NE(result, nullptr);
int8_t* dieIdPtr = static_cast<int8_t*>(result);
EXPECT_EQ(*dieIdPtr, -1);
ctx->loopDieId_ = 7;
result = DeviceExecuteContext::DeviceExecuteRuntimeCallGetLoopDieId(ctx, 0);
dieIdPtr = static_cast<int8_t*>(result);
EXPECT_EQ(*dieIdPtr, 7);
}
TEST_F(TestDeviceExecuteContext, test_runtime_call_set_loop_die_id)
{
alignas(alignof(DeviceExecuteContext)) char buffer[sizeof(DeviceExecuteContext)];
DeviceExecuteContext* ctx = reinterpret_cast<DeviceExecuteContext*>(buffer);
(void)memset_s(buffer, sizeof(DeviceExecuteContext), 0, sizeof(DeviceExecuteContext));
DevAscendFunctionDuppedData duppedData{};
duppedData.loopDieId_ = -1;
ctx->currDevRootDup.dupTiny_.ptr = reinterpret_cast<uint64_t>(&duppedData);
ctx->loopDieId_ = 3;
void* result = DeviceExecuteContext::DeviceExecuteRuntimeCallSetLoopDieId(ctx, 0);
EXPECT_EQ(result, nullptr);
EXPECT_EQ(duppedData.loopDieId_, 3);
ctx->loopDieId_ = 12;
result = DeviceExecuteContext::DeviceExecuteRuntimeCallSetLoopDieId(ctx, 0);
EXPECT_EQ(result, nullptr);
EXPECT_EQ(duppedData.loopDieId_, 12);
}
TEST_F(TestDeviceTaskContext, test_dev_ascend_function_dupped_dump_topo)
{
constexpr size_t kOpCount = 4;
constexpr size_t kFuncBufferSize = 4096;
constexpr size_t kDuppedDataBufferSize = 2048;
std::unique_ptr<uint8_t[]> funcBuffer;
uint8_t* funcDataPtr;
DevAscendFunction* devFunc = CreateDevAscendFunctionBuffer(funcBuffer, funcDataPtr, kOpCount, kFuncBufferSize);
SetupDevAscendFunctionData(devFunc, funcDataPtr, funcBuffer.get(), kOpCount);
std::unique_ptr<uint8_t[]> duppedDataBuffer;
uint8_t* duppedDataPtr;
DevAscendFunctionDuppedData* duppedData =
CreateDevAscendFunctionDuppedData(duppedDataBuffer, duppedDataPtr, devFunc, kOpCount, kDuppedDataBufferSize);
DevAscendFunctionDupped funcDupped;
WsAllocation tinyAlloc;
tinyAlloc.ptr = reinterpret_cast<uint64_t>(duppedData);
funcDupped = DevAscendFunctionDupped(tinyAlloc);
auto devTaskPtr = std::make_unique<DeviceTask>();
DeviceTask& devTask = *devTaskPtr;
std::unique_ptr<int32_t[]> opWrapListData;
DevCceBinary cceBinary[1];
SetupTestEnvironment(devTask, opWrapListData, cceBinary, kOpCount);
std::string testFilePath = "./test_dump_topo_direct_output.txt";
{
std::ofstream outFile(testFilePath);
ASSERT_TRUE(outFile.is_open());
int seqNo = 0;
int funcIdx = 0;
bool enableVFFusion = false;
funcDupped.DumpTopo(outFile, seqNo, funcIdx, cceBinary, enableVFFusion, &devTask);
outFile.close();
VerifyDumpTopoOutput(testFilePath, kOpCount);
}
std::remove(testFilePath.c_str());
}
TEST_F(TestDeviceTaskContext, test_process_wrap_queue_nullptr)
{
DeviceTaskContext taskContext;
DevStartArgsBase startArgs;
DevAscendProgram devProg;
CreateMockDevAscendProgram(&devProg, ArchInfo::DAV_3510);
DeviceWorkspaceAllocator workspace(&devProg);
std::unique_ptr<DynDeviceTask> dyntask;
std::unique_ptr<uint8_t[]> controlFlowCacheBuf;
SetupBasicTaskContext(taskContext, startArgs, &devProg, dyntask, workspace, controlFlowCacheBuf, 0);
taskContext.ProcessWrapQueue(dyntask.get(), 1, 0, 0, nullptr);
}
TEST_F(TestDeviceTaskContext, test_process_wrap_queue_update_existing_wrap)
{
DevAscendProgram devProg;
DeviceTaskContext taskContext;
DevStartArgsBase startArgs;
CreateMockDevAscendProgram(&devProg, ArchInfo::DAV_3510);
DeviceWorkspaceAllocator workspace(&devProg);
std::unique_ptr<DynDeviceTask> dyntask;
constexpr size_t kControlFlowCacheSize = 64 * 1024;
auto controlFlowCacheBuf = std::make_unique<uint8_t[]>(kControlFlowCacheSize);
SetupBasicTaskContext(
taskContext, startArgs, &devProg, dyntask, workspace, controlFlowCacheBuf, kControlFlowCacheSize);
DevAscendFunction devFunc;
DevCceBinary cceBinary[1] = {};
int calleeList[1] = {0};
uint16_t opWrapOffsetList[2] = {0};
WrapInfoQueue* wrapQueue =
SetupWrapQueueForTest(dyntask.get(), taskContext, CoreType::AIC, 0, devFunc, cceBinary, calleeList, opWrapOffsetList);
ASSERT_NE(wrapQueue, nullptr);
taskContext.ProcessWrapQueue(dyntask.get(), 1, 0, 0, wrapQueue);
EXPECT_EQ(wrapQueue->tail, 1);
taskContext.ProcessWrapQueue(dyntask.get(), 1, 0, 0, wrapQueue);
EXPECT_EQ(wrapQueue->tail, 1);
}
TEST_F(TestDeviceTaskContext, test_process_wrap_queue_aiv0)
{
DeviceTaskContext taskContext;
DevAscendProgram devProg;
DevStartArgsBase startArgs;
CreateMockDevAscendProgram(&devProg, ArchInfo::DAV_3510);
DeviceWorkspaceAllocator workspace(&devProg);
std::unique_ptr<DynDeviceTask> dyntask;
constexpr size_t kControlFlowCacheSize = 64 * 1024;
auto controlFlowCacheBuf = std::make_unique<uint8_t[]>(kControlFlowCacheSize);
SetupBasicTaskContext(
taskContext, startArgs, &devProg, dyntask, workspace, controlFlowCacheBuf, kControlFlowCacheSize);
DevCceBinary cceBinary[1] = {};
DevAscendFunction devFunc;
int calleeList[1] = {0};
uint16_t opWrapOffsetList[2] = {0};
WrapInfoQueue* wrapQueue =
SetupWrapQueueForTest(dyntask.get(), taskContext, CoreType::AIV, 0, devFunc, cceBinary, calleeList, opWrapOffsetList);
ASSERT_NE(wrapQueue, nullptr);
taskContext.ProcessWrapQueue(dyntask.get(), 1, 0, 0, wrapQueue);
EXPECT_EQ(wrapQueue->tail, 1);
EXPECT_EQ(wrapQueue->elem[0].wrapId, 1);
EXPECT_EQ(wrapQueue->elem[0].tasklist[WRAP_IDX_AIC], AICORE_TASK_INIT);
EXPECT_EQ(wrapQueue->elem[0].tasklist[WRAP_IDX_AIV0], MakeTaskID(0, 0));
EXPECT_EQ(wrapQueue->elem[0].tasklist[WRAP_IDX_AIV1], AICORE_TASK_INIT);
}
TEST_F(TestDeviceTaskContext, test_process_wrap_queue_aiv1)
{
DeviceTaskContext taskContext;
DevStartArgsBase startArgs;
DevAscendProgram devProg;
CreateMockDevAscendProgram(&devProg, ArchInfo::DAV_3510);
DeviceWorkspaceAllocator workspace(&devProg);
std::unique_ptr<DynDeviceTask> dyntask;
constexpr size_t kControlFlowCacheSize = 64 * 1024;
auto controlFlowCacheBuf = std::make_unique<uint8_t[]>(kControlFlowCacheSize);
SetupBasicTaskContext(
taskContext, startArgs, &devProg, dyntask, workspace, controlFlowCacheBuf, kControlFlowCacheSize);
int calleeList[1] = {0};
DevAscendFunction devFunc;
DevCceBinary cceBinary[1] = {};
uint16_t opWrapOffsetList[2] = {0};
WrapInfoQueue* wrapQueue =
SetupWrapQueueForTest(dyntask.get(), taskContext, CoreType::AIV, 1, devFunc, cceBinary, calleeList, opWrapOffsetList);
ASSERT_NE(wrapQueue, nullptr);
taskContext.ProcessWrapQueue(dyntask.get(), 1, 0, 0, wrapQueue);
EXPECT_EQ(wrapQueue->tail, 1);
EXPECT_EQ(wrapQueue->elem[0].wrapId, 1);
EXPECT_EQ(wrapQueue->elem[0].tasklist[WRAP_IDX_AIC], AICORE_TASK_INIT);
EXPECT_EQ(wrapQueue->elem[0].tasklist[WRAP_IDX_AIV0], AICORE_TASK_INIT);
EXPECT_EQ(wrapQueue->elem[0].tasklist[WRAP_IDX_AIV1], MakeTaskID(0, 0));
}