* Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
* MindIE is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
*/
#include <gtest/gtest.h>
#include <deque>
#define private public
#include "self_attn_block_manager.h"
#include "sequence_group.h"
#include "hashless_block_allocator.h"
namespace mindie_llm {
struct BlockManagerSetup {
size_t blockSize;
size_t cpuBlockNum;
size_t npuBlockNum;
bool enableCaching;
size_t numTests;
BlockManagerSetup(size_t blkSize, size_t cpuBlkNum, size_t npuBlkNum, bool enblCaching, size_t numTsts)
: blockSize(blkSize), cpuBlockNum(cpuBlkNum), npuBlockNum(npuBlkNum), enableCaching(enblCaching),
numTests(numTsts)
{
}
};
struct OperationParams {
RequestId requestId;
std::string api;
SequenceId seqId;
std::vector<TokenId> prompts;
std::vector<TokenId> tokensToAppend;
std::vector<AllocStatus> canAllocate;
std::vector<bool> canAppend;
std::vector<bool> canSwapOut;
std::vector<AllocStatus> canSwapIn;
std::deque<BlockId> cpuFreeBlockIds;
std::deque<BlockId> npuFreeBlockIds;
};
struct SystemTestData {
BlockManagerSetup setup;
OperationParams operations[100];
};
const SystemTestData systemTestData[] = {
{
BlockManagerSetup(4, 1, 1, false, 2),
OperationParams{"Req:0", "Allocate", 0, {1, 2, 3, 4}, {}, {AllocStatus::OK}, {}, {}, {}, {1}, {}},
OperationParams{"Req:0", "Free", 0, {}, {}, {}, {}, {}, {}, {1}, {0}},
},
{
BlockManagerSetup(4, 1, 1, false, 4),
OperationParams{"Req:0", "Allocate", 0, {1, 2, 3, 4, 5, 6}, {}, {AllocStatus::NEVER}, {}, {}, {}, {1}, {0}},
OperationParams{"Req:1", "Allocate", 0, {1, 2, 3, 4}, {}, {AllocStatus::OK}, {}, {}, {}, {1}, {}},
OperationParams{"Req:2", "Allocate", 0, {6, 7, 8, 9}, {}, {AllocStatus::LATER}, {}, {}, {}, {1}, {}},
OperationParams{"Req:0", "Free", 0, {}, {}, {}, {}, {}, {}, {1}, {0}},
},
{
BlockManagerSetup(4, 2, 2, false, 3),
OperationParams{"Req:0", "Allocate", 0, {1, 2, 3, 4}, {}, {AllocStatus::OK}, {}, {}, {}, {2, 3}, {1}},
OperationParams{"Req:0", "AppendSlot", 0, {}, {5}, {}, {true}, {}, {}, {2, 3}, {}},
OperationParams{"Req:0", "Free", 0, {}, {}, {}, {}, {}, {}, {2, 3}, {0, 1}},
},
{
BlockManagerSetup(4, 2, 1, false, 2),
OperationParams{"Req:0", "Allocate", 0, {1, 2, 3, 4}, {}, {AllocStatus::OK}, {}, {}, {}, {1, 2}, {}},
OperationParams{"Req:0", "AppendSlot", 0, {}, {5}, {}, {false}, {}, {}, {1, 2}, {}},
},
{BlockManagerSetup(4, 2, 2, false, 5),
OperationParams{"Req:0", "Allocate", 0, {1, 2, 3, 4}, {}, {AllocStatus::OK}, {}, {}, {}, {2, 3}, {1}},
OperationParams{"Req:0", "AppendSlot", 0, {}, {5}, {}, {true}, {}, {}, {2, 3}, {}},
OperationParams{"Req:0", "SwapOut", 0, {}, {}, {}, {}, {true}, {}, {}, {0, 1}},
OperationParams{"Req:0", "SwapIn", 0, {}, {}, {}, {}, {}, {AllocStatus::OK}, {2, 3}, {}},
OperationParams{"Req:0", "Free", 0, {}, {}, {}, {}, {}, {}, {2, 3}, {0, 1}}},
{BlockManagerSetup(4, 0, 2, false, 3),
OperationParams{"Req:0", "Allocate", 0, {1, 2, 3, 4}, {}, {AllocStatus::OK}, {}, {}, {}, {}, {1}},
OperationParams{"Req:0", "AppendSlot", 0, {}, {5}, {}, {true}, {}, {}, {}, {}},
OperationParams{"Req:0", "SwapOut", 0, {}, {}, {}, {}, {false}, {}, {}, {}}},
{
BlockManagerSetup(4, 1, 2, false, 6),
OperationParams{"Req:0", "Allocate", 0, {1, 2, 3}, {}, {AllocStatus::OK}, {}, {}, {}, {2}, {1}},
OperationParams{"Req:0", "AppendSlot", 0, {}, {4}, {}, {true}, {}, {}, {2}, {1}},
OperationParams{"Req:0", "SwapOut", 0, {}, {}, {}, {}, {true}, {}, {}, {1, 0}},
OperationParams{"Req:1", "Allocate", 1, {5, 6, 7}, {}, {AllocStatus::OK}, {}, {}, {}, {}, {0}},
OperationParams{"Req:2", "Allocate", 2, {8, 9, 10}, {}, {AllocStatus::OK}, {}, {}, {}, {}, {}},
OperationParams{"Req:0", "SwapIn", 0, {}, {}, {}, {}, {}, {AllocStatus::LATER}, {}, {}},
},
{
BlockManagerSetup(8, 2, 2, false, 3),
OperationParams{
"Req:0", "Allocate", 0, {1, 2, 3, 4, 5, 6, 7, 8}, {}, {AllocStatus::OK}, {}, {}, {}, {2, 3}, {1}},
OperationParams{"Req:0", "SwapOut", 0, {}, {}, {}, {}, {true}, {}, {3}, {1, 0}},
OperationParams{"Req:0", "SwapIn", 0, {}, {}, {}, {}, {}, {AllocStatus::OK}, {3, 2}, {0}},
OperationParams{"Req:0", "Free", 0, {}, {}, {}, {}, {}, {}, {3, 2}, {0, 1}},
},
{BlockManagerSetup(4, 2, 2, false, 4),
OperationParams{"Req:0", "Allocate", 0, {1, 2, 3, 4, 5, 6, 7, 8}, {}, {AllocStatus::OK}, {}, {}, {}, {2, 3}, {}},
OperationParams{"Req:0", "SwapOut", 0, {}, {}, {}, {}, {true}, {}, {}, {0, 1}},
OperationParams{"Req:0", "SwapIn", 0, {}, {}, {}, {}, {}, {AllocStatus::OK}, {2, 3}, {}},
OperationParams{"Req:0", "Free", 0, {}, {}, {}, {}, {}, {}, {2, 3}, {0, 1}}},
{
BlockManagerSetup(2, 1, 2, false, 2),
OperationParams{"Req:0", "Allocate", 0, {1, 2, 3, 4}, {}, {AllocStatus::OK}, {}, {}, {}, {2}, {}},
OperationParams{"Req:0", "SwapOut", 0, {}, {}, {}, {}, {false}, {}, {2}, {}},
},
{
BlockManagerSetup(4, 2, 3, false, 5),
OperationParams{"Req:0", "Allocate", 0, {1, 2, 3, 4, 5, 6}, {}, {AllocStatus::OK}, {}, {}, {}, {3, 4}, {2}},
OperationParams{"Req:0", "SwapOut", 0, {}, {}, {}, {}, {true}, {}, {}, {2, 0, 1}},
OperationParams{"Req:0", "SwapIn", 0, {}, {}, {}, {}, {}, {AllocStatus::OK}, {3, 4}, {1}},
OperationParams{"Req:0", "AppendSlot", 0, {}, {7, 8}, {}, {true}, {}, {}, {3, 4}, {1}},
OperationParams{"Req:0", "Free", 0, {}, {}, {}, {}, {}, {}, {3, 4}, {1, 2, 0}},
},
{
BlockManagerSetup(4, 2, 3, false, 4),
OperationParams{"Req:0", "Allocate", 0, {1, 2, 3, 4, 5, 6}, {}, {AllocStatus::OK}, {}, {}, {}, {3, 4}, {2}},
OperationParams{"Req:0", "SwapOut", 0, {}, {}, {}, {}, {true}, {}, {}, {2, 0, 1}},
OperationParams{"Req:0", "SwapIn", 0, {}, {}, {}, {}, {}, {AllocStatus::OK}, {3, 4}, {1}},
OperationParams{"Req:0", "AppendSlot", 0, {}, {7, 8, 9, 10, 11, 12}, {}, {true}, {}, {}, {3, 4}, {}},
},
{
BlockManagerSetup(4, 2, 4, false, 6),
OperationParams{"Req:0", "Allocate", 0, {1, 2, 3, 4, 5, 6}, {}, {AllocStatus::OK}, {}, {}, {}, {4, 5}, {2, 3}},
OperationParams{"Req:0", "AppendSlot", 0, {}, {7, 8}, {}, {true}, {}, {}, {4, 5}, {2, 3}},
OperationParams{"Req:0", "SwapOut", 0, {}, {}, {}, {}, {true}, {}, {}, {2, 3, 0, 1}},
OperationParams{"Req:0", "SwapIn", 0, {}, {}, {}, {}, {}, {AllocStatus::OK}, {4, 5}, {0, 1}},
OperationParams{"Req:0", "AppendSlot", 0, {}, {9, 10}, {}, {true}, {}, {}, {4, 5}, {1}},
OperationParams{"Req:0", "Free", 0, {}, {}, {}, {}, {}, {}, {4, 5}, {1, 2, 3, 0}},
},
{
BlockManagerSetup(4, 2, 3, false, 6),
OperationParams{"Req:0", "Allocate", 0, {1, 2, 3, 4, 5, 6}, {}, {AllocStatus::OK}, {}, {}, {}, {3, 4}, {2}},
OperationParams{"Req:0", "AppendSlot", 0, {}, {7, 8}, {}, {true}, {}, {}, {3, 4}, {2}},
OperationParams{"Req:0", "SwapOut", 0, {}, {}, {}, {}, {true}, {}, {}, {2, 0, 1}},
OperationParams{"Req:0", "SwapIn", 0, {}, {}, {}, {}, {}, {AllocStatus::OK}, {3, 4}, {1}},
OperationParams{"Req:1", "Allocate", 1, {9, 10, 11, 12}, {}, {AllocStatus::OK}, {}, {}, {}, {3, 4}, {}},
OperationParams{"Req:0", "AppendSlot", 0, {}, {13, 14}, {}, {false}, {}, {}, {3, 4}, {}},
},
};
class BlockManagerSystemTest : public ::testing::TestWithParam<SystemTestData> {
protected:
std::shared_ptr<SamplingParams> sampling;
size_t reservedBlockNum = 0;
size_t speculativeSlots = 0;
BlockTable blockTable;
};
std::vector<BlockId> GetAllBlockIds(BlockSpaceManagerSPtr blockManagerPtr, DeviceType deviceType)
{
std::shared_ptr<HashLessBlockAllocator> hashlessBlockAllocator = std::dynamic_pointer_cast<HashLessBlockAllocator>(
std::dynamic_pointer_cast<CpuNpuBlockAllocator>(
std::dynamic_pointer_cast<SelfAttnBlockManager>(blockManagerPtr)->blockAllocator_)
->GetAllocator(deviceType));
return hashlessBlockAllocator->allBlockIndices_;
}
std::deque<BlockId> GetFreeBlockIds(BlockSpaceManagerSPtr blockManagerPtr, DeviceType deviceType)
{
std::shared_ptr<HashLessBlockAllocator> hashlessBlockAllocator = std::dynamic_pointer_cast<HashLessBlockAllocator>(
std::dynamic_pointer_cast<CpuNpuBlockAllocator>(
std::dynamic_pointer_cast<SelfAttnBlockManager>(blockManagerPtr)->blockAllocator_)
->GetAllocator(deviceType));
return hashlessBlockAllocator->freeBlockIndices_;
}
TEST_P(BlockManagerSystemTest, TestFlow)
{
SystemTestData stData = GetParam();
BlockManagerConfig config{stData.setup.blockSize, stData.setup.cpuBlockNum, stData.setup.npuBlockNum,
reservedBlockNum, speculativeSlots, stData.setup.enableCaching};
SelfAttnBlockManager blockManager(config);
BlockSpaceManagerSPtr blockManagerPtr = std::make_shared<SelfAttnBlockManager>(blockManager);
std::vector<BlockId> cpuAllBlockIds, npuAllBlockIds;
std::deque<BlockId> cpuMockFreeBlockIds, npuMockFreeBlockIds;
size_t numCpuBlocksBefore, numNpuBlocksBefore;
size_t numCpuBlocksAfter, numNpuBlocksAfter;
size_t numCpuChangeThisTime, numNpuChangeThisTime;
std::vector<BlockId> cpuMockAllocatedThisTime, npuMockAllocatedThisTime;
AllocStatus canAllocate;
bool canAppend, canSwapOut;
std::vector<std::vector<TokenId>> tokensChunked, outputsChunked;
std::vector<BlockId> cpuAllocatedBlockIds, npuAllocatedBlockIds;
std::vector<std::pair<PhysicalBlockId, PhysicalBlockId>> physicalBlockIdMapping;
std::vector<std::pair<BlockId, BlockId>> allCowAfterAppend;
cpuAllBlockIds = GetAllBlockIds(blockManagerPtr, DeviceType::CPU);
npuAllBlockIds = GetAllBlockIds(blockManagerPtr, DeviceType::NPU);
std::unordered_map<RequestId, SequenceGroupSPtr> reqid2ptr;
SequenceSPtr seqPtr, thisSeqPtr;
SequenceGroupSPtr groupPtr;
for (auto i = 0; i < stData.setup.numTests; i++) {
auto thisOperation = stData.operations[i];
auto it = reqid2ptr.find(thisOperation.requestId);
if (it == reqid2ptr.end()) {
seqPtr = std::make_shared<Sequence>(
Sequence(thisOperation.seqId, stData.setup.blockSize, thisOperation.prompts));
reqid2ptr[thisOperation.requestId] = std::make_shared<SequenceGroup>(
thisOperation.requestId, std::vector<std::shared_ptr<Sequence>>({seqPtr}), sampling);
}
groupPtr = reqid2ptr[thisOperation.requestId];
thisSeqPtr = groupPtr->firstSeq;
if (thisOperation.api == "Allocate") {
canAllocate = blockManager.CanAllocate(groupPtr);
EXPECT_EQ(canAllocate, thisOperation.canAllocate[0]);
if (!(canAllocate == AllocStatus::OK)) {
continue;
}
EXPECT_EQ(blockManager.Allocate(groupPtr), true);
thisSeqPtr->status_ = SequenceStatus::RUNNING;
}
if (thisOperation.api == "AppendSlot") {
thisSeqPtr->data_.outputTokenIds.insert(thisSeqPtr->data_.outputTokenIds.end(),
thisOperation.tokensToAppend.begin(),
thisOperation.tokensToAppend.end());
canAppend = blockManager.CanAppendSlot(groupPtr);
EXPECT_EQ(canAppend, thisOperation.canAppend[0]);
if (!canAppend) {
continue;
}
EXPECT_EQ(thisSeqPtr->status_, SequenceStatus::RUNNING);
allCowAfterAppend = blockManager.AppendSlot(thisSeqPtr);
}
if (thisOperation.api == "SwapOut") {
canSwapOut = blockManager.CanSwapOut(groupPtr);
EXPECT_EQ(canSwapOut, thisOperation.canSwapOut[0]);
if (!(canSwapOut == true)) {
continue;
}
EXPECT_EQ(thisSeqPtr->status_, SequenceStatus::RUNNING);
physicalBlockIdMapping = blockManager.SwapOut(groupPtr);
thisSeqPtr->status_ = SequenceStatus::SWAPPED;
}
if (thisOperation.api == "SwapIn") {
AllocStatus canSwapIn = blockManager.CanSwapIn(groupPtr, 0);
EXPECT_EQ(canSwapIn, thisOperation.canSwapIn[0]);
if (!(canSwapIn == AllocStatus::OK)) {
continue;
}
EXPECT_EQ(groupPtr->firstSeq->status_, SequenceStatus::SWAPPED);
physicalBlockIdMapping = blockManager.SwapIn(groupPtr);
thisSeqPtr->status_ = SequenceStatus::RUNNING;
}
if (thisOperation.api == "Free") {
EXPECT_NO_THROW(blockManager.Free(thisOperation.seqId));
thisSeqPtr->status_ = SequenceStatus::WAITING;
}
EXPECT_EQ(GetFreeBlockIds(blockManagerPtr, DeviceType::CPU), thisOperation.cpuFreeBlockIds);
EXPECT_EQ(GetFreeBlockIds(blockManagerPtr, DeviceType::NPU), thisOperation.npuFreeBlockIds);
auto index = blockManager.seqId2BlockTable_.find(thisOperation.seqId);
if (index != blockManager.seqId2BlockTable_.end()) {
const auto allBlockIds = blockManager.GetBlockIds(thisOperation.seqId);
ASSERT_EQ(allBlockIds.size(), 1u);
EXPECT_EQ(allBlockIds[0].size(),
blockTable.ChunkTokensForAllocate(thisSeqPtr->GetTokenIds(), stData.setup.blockSize).size());
}
}
};
INSTANTIATE_TEST_SUITE_P(TestFlow, BlockManagerSystemTest, ::testing::ValuesIn(systemTestData));
}