* Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
* MindIE is licensed under Mulan PSL v2.
*/
#include <gtest/gtest.h>
#include <stdexcept>
#include <vector>
#define private public
#include "request_single_block_manager.h"
#include "sequence_group.h"
#undef private
namespace mindie_llm {
static SequenceGroupSPtr MakeGroup(const RequestId &rid, const std::vector<SequenceSPtr> &seqs)
{
RequestId ridCopy = rid;
return std::make_shared<SequenceGroup>(ridCopy, seqs, nullptr);
}
TEST(RequestSingleBlockManagerTest, AllocateReuseForkFreeAndRankedBlockIds)
{
BlockManagerConfig cfg{4, 0, 4, 0, 0, false, 2, 1};
RequestSingleBlockManager mgr(cfg);
auto s1 = std::make_shared<Sequence>( 1, 8, std::vector<TokenId>{1, 2, 3, 4});
auto g1 = MakeGroup("rq", {s1});
EXPECT_EQ(mgr.CanAllocate(g1), AllocStatus::OK);
EXPECT_TRUE(mgr.Allocate(g1));
ASSERT_EQ(mgr.requestEntries_.size(), 1u);
EXPECT_EQ(mgr.GetNumFreeNpuBlocks(), 3u);
const auto ids1 = mgr.GetBlockIds(1);
ASSERT_EQ(ids1.size(), 1u);
ASSERT_EQ(ids1[0].size(), 1u);
const BlockId bid = ids1[0][0];
auto s2 = std::make_shared<Sequence>( 2, 8, std::vector<TokenId>{9, 10});
auto g2 = MakeGroup("rq", {s2});
EXPECT_EQ(mgr.CanAllocate(g2), AllocStatus::OK);
EXPECT_TRUE(mgr.Allocate(g2));
EXPECT_EQ(mgr.GetNumFreeNpuBlocks(), 3u);
EXPECT_EQ(mgr.GetBlockIds(2)[0][0], bid);
std::vector<RankedBlockId> ranked;
mgr.GetRankedBlockIds(1, ranked);
ASSERT_EQ(ranked.size(), 2u);
EXPECT_EQ(ranked[0], (RankedBlockId{bid, 0}));
EXPECT_EQ(ranked[1], (RankedBlockId{bid, 1}));
std::vector<std::vector<BlockId>> ranked2;
mgr.GetRankedBlockIds(1, ranked2);
ASSERT_EQ(ranked2.size(), 2u);
EXPECT_EQ(ranked2[0], (std::vector<BlockId>{bid}));
EXPECT_EQ(ranked2[1], (std::vector<BlockId>{bid}));
std::vector<RankedBlockId> rankedMissing;
mgr.GetRankedBlockIds(999, rankedMissing);
EXPECT_TRUE(rankedMissing.empty());
auto child = std::make_shared<Sequence>( 3, 8, std::vector<TokenId>{});
mgr.Fork(s1, child);
EXPECT_EQ(mgr.GetBlockIds(3)[0][0], bid);
EXPECT_EQ(mgr.GetNumFreeNpuBlocks(), 3u);
mgr.Free(1);
mgr.Free(2);
EXPECT_EQ(mgr.GetNumFreeNpuBlocks(), 3u);
mgr.Free(3);
EXPECT_EQ(mgr.GetNumFreeNpuBlocks(), 4u);
EXPECT_TRUE(mgr.requestEntries_.empty());
}
TEST(RequestSingleBlockManagerTest, ForkWithoutAllocateThrows)
{
BlockManagerConfig cfg{4, 0, 2, 0, 0, false, 1, 1};
RequestSingleBlockManager mgr(cfg);
auto parent = std::make_shared<Sequence>( 1, 8, std::vector<TokenId>{1});
auto child = std::make_shared<Sequence>( 2, 8, std::vector<TokenId>{});
EXPECT_THROW(mgr.Fork(parent, child), std::runtime_error);
}
TEST(RequestSingleBlockManagerTest, ConstructorValidatesConfig)
{
BlockManagerConfig cfg{4, 0, 1, 0, 0, false, 0, 1};
EXPECT_THROW((void)RequestSingleBlockManager(cfg), std::invalid_argument);
cfg.rankSize = 1;
cfg.hostSize = 0;
EXPECT_THROW((void)RequestSingleBlockManager(cfg), std::invalid_argument);
cfg.hostSize = 1;
cfg.npuBlockNum = 0;
EXPECT_THROW((void)RequestSingleBlockManager(cfg), std::invalid_argument);
cfg.npuBlockNum = 1;
cfg.reservedBlockNum = 2;
EXPECT_THROW((void)RequestSingleBlockManager(cfg), std::invalid_argument);
}
TEST(RequestSingleBlockManagerTest, CanAllocateAndAllocateCoverEdgeBranches)
{
BlockManagerConfig cfg{4, 0, 2, 0, 0, false, 1, 1};
RequestSingleBlockManager mgr(cfg);
EXPECT_EQ(mgr.CanAllocate(nullptr), AllocStatus::NEVER);
EXPECT_FALSE(mgr.Allocate(nullptr));
auto running = std::make_shared<Sequence>( 1, 8, std::vector<TokenId>{1});
running->status_ = SequenceStatus::RUNNING;
auto gNoWaiting = MakeGroup("rq_nowait", {running});
EXPECT_EQ(mgr.CanAllocate(gNoWaiting), AllocStatus::NEVER);
EXPECT_FALSE(mgr.Allocate(gNoWaiting));
}
TEST(RequestSingleBlockManagerTest, CanAllocateReturnsNeverWhenAllNpuReserved)
{
BlockManagerConfig cfg{4, 0, 1, 1, 0, false, 1, 1};
RequestSingleBlockManager mgr(cfg);
auto s1 = std::make_shared<Sequence>( 1, 8, std::vector<TokenId>{1});
auto g1 = MakeGroup("rq", {s1});
EXPECT_EQ(mgr.CanAllocate(g1), AllocStatus::NEVER);
}
TEST(RequestSingleBlockManagerTest, CanAllocateReturnsLaterWhenNoFreeBlockForNewRequest)
{
BlockManagerConfig cfg{4, 0, 1, 0, 0, false, 1, 1};
RequestSingleBlockManager mgr(cfg);
auto s1 = std::make_shared<Sequence>( 1, 8, std::vector<TokenId>{1});
auto g1 = MakeGroup("rq1", {s1});
EXPECT_TRUE(mgr.Allocate(g1));
EXPECT_EQ(mgr.GetNumFreeNpuBlocks(), 0u);
auto s2 = std::make_shared<Sequence>( 2, 8, std::vector<TokenId>{1});
auto g2 = MakeGroup("rq2", {s2});
EXPECT_EQ(mgr.CanAllocate(g2), AllocStatus::LATER);
}
TEST(RequestSingleBlockManagerTest, AppendAndSwapApisCoverBranches)
{
BlockManagerConfig cfg{4, 2, 2, 0, 0, false, 1, 1};
RequestSingleBlockManager mgr(cfg);
EXPECT_FALSE(mgr.CanAppendSlot(nullptr));
EXPECT_FALSE(mgr.CanAppendSlotNew(nullptr));
EXPECT_TRUE(mgr.AppendSlot(nullptr).empty());
mgr.AppendSlotNew(nullptr);
mgr.AppendTokenToLatestRank(0, std::vector<TokenId>{1});
auto s1 = std::make_shared<Sequence>( 1, 8, std::vector<TokenId>{1});
auto g1 = MakeGroup("rq", {s1});
EXPECT_FALSE(mgr.CanAppendSlot(g1));
EXPECT_TRUE(mgr.Allocate(g1));
EXPECT_TRUE(mgr.CanAppendSlot(g1));
EXPECT_TRUE(mgr.CanAppendSlotNew(g1));
EXPECT_TRUE(mgr.AppendSlot(s1).empty());
EXPECT_FALSE(mgr.CanSwapOut(g1));
EXPECT_TRUE(mgr.SwapOut(g1).empty());
EXPECT_EQ(mgr.CanSwapIn(g1, 0), AllocStatus::NEVER);
EXPECT_TRUE(mgr.SwapIn(g1).empty());
EXPECT_EQ(mgr.GetNumFreeCpuBlocks(), 2u);
}
TEST(RequestSingleBlockManagerTest, FreeCoversRefCountAndInconsistentStateBranch)
{
BlockManagerConfig cfg{4, 0, 2, 0, 0, false, 1, 1};
RequestSingleBlockManager mgr(cfg);
mgr.Free(999);
auto s1 = std::make_shared<Sequence>( 1, 8, std::vector<TokenId>{1});
auto s2 = std::make_shared<Sequence>( 2, 8, std::vector<TokenId>{1});
auto g = MakeGroup("rq", {s1, s2});
EXPECT_TRUE(mgr.Allocate(g));
ASSERT_EQ(mgr.requestEntries_.count("rq"), 1u);
EXPECT_EQ(mgr.requestEntries_.at("rq").refCount, 2u);
EXPECT_EQ(mgr.GetNumFreeNpuBlocks(), 1u);
mgr.Free(1);
ASSERT_EQ(mgr.requestEntries_.count("rq"), 1u);
EXPECT_EQ(mgr.requestEntries_.at("rq").refCount, 1u);
EXPECT_EQ(mgr.GetNumFreeNpuBlocks(), 1u);
mgr.Free(2);
EXPECT_TRUE(mgr.requestEntries_.empty());
EXPECT_EQ(mgr.GetNumFreeNpuBlocks(), 2u);
mgr.seqId2RequestId_[123] = "rq_missing";
EXPECT_TRUE(mgr.requestEntries_.empty());
mgr.Free(123);
EXPECT_TRUE(mgr.seqId2RequestId_.empty());
}
TEST(RequestSingleBlockManagerTest, ForkCoversInvalidArgsAndMissingBlockBranch)
{
BlockManagerConfig cfg{4, 0, 2, 0, 0, false, 1, 1};
RequestSingleBlockManager mgr(cfg);
SequenceSPtr nullSeq;
auto child = std::make_shared<Sequence>( 2, 8, std::vector<TokenId>{});
EXPECT_THROW(mgr.Fork(nullSeq, child), std::invalid_argument);
auto parent = std::make_shared<Sequence>( 1, 8, std::vector<TokenId>{1});
auto g = MakeGroup("rq", {parent});
EXPECT_TRUE(mgr.Allocate(g));
mgr.requestEntries_.at("rq").block.reset();
EXPECT_THROW(mgr.Fork(parent, child), std::runtime_error);
}
TEST(RequestSingleBlockManagerTest, TrivialApisAreCallableForCoverage)
{
BlockManagerConfig cfg{4, 2, 2, 0, 0, false, 2, 1};
RequestSingleBlockManager mgr(cfg);
auto s1 = std::make_shared<Sequence>( 1, 8, std::vector<TokenId>{1});
auto g1 = MakeGroup("rq", {s1});
EXPECT_TRUE(mgr.Allocate(g1));
EXPECT_FALSE(mgr.IsAppendBlock(1));
EXPECT_EQ(mgr.GetLatestAppendedRankId(1), 0u);
EXPECT_EQ(mgr.GetAppendedBlockRankId(1), 0u);
EXPECT_EQ(mgr.GetTokenCountPerRank(1).size(), 2u);
EXPECT_TRUE(mgr.GetRankedHashValues(1).empty());
EXPECT_TRUE(mgr.GetSeqHashValues(1).empty());
EXPECT_TRUE(mgr.GetCommonComputedBlockIds({s1}).empty());
EXPECT_TRUE(mgr.GetAllrankComputedBlockNum({s1}).empty());
EXPECT_TRUE(mgr.GetRemoteComputedBlockIds({s1}, 0, 1, "m").empty());
std::vector<size_t> computed;
EXPECT_TRUE(mgr.GetAllRankRemoteComputedBlockIds({s1}, computed, "m").empty());
mgr.AccessAllblocksInSeq(s1, 0.0f);
mgr.MarkBlocksAsComputed();
const float hitRate = mgr.GetPrefixCacheHitRate();
if (cfg.enableCaching) {
EXPECT_GE(hitRate, 0.0f);
} else {
EXPECT_EQ(hitRate, -1.0f);
}
(void)mgr.ResetPrefixCache();
mgr.ReplaceTrailingPlaceHolder(s1, 0, 0);
EXPECT_EQ(mgr.GetNumCachedTokens(s1), 0u);
EXPECT_EQ(mgr.GetSeqNumCachedTokens(s1), 0u);
std::vector<RankedBlockId> ranked;
mgr.GetRankedBlockIds(999, ranked);
EXPECT_TRUE(ranked.empty());
std::vector<std::vector<BlockId>> ranked2;
mgr.GetRankedBlockIds(999, ranked2);
ASSERT_EQ(ranked2.size(), 2u);
EXPECT_TRUE(ranked2[0].empty());
EXPECT_TRUE(ranked2[1].empty());
}
}