* Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
* MindIE is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
*/
#ifndef CPU_NPU_BLOCK_ALLOCATOR_H
#define CPU_NPU_BLOCK_ALLOCATOR_H
#include <unordered_map>
#include "device_aware_block_allocator.h"
namespace mindie_llm {
class CpuNpuBlockAllocator : public DeviceAwareBlockAllocator {
public:
explicit CpuNpuBlockAllocator(const AllocatorConfig &config);
BlockObjSPtr AllocateMutableBlock(DeviceType deviceType, std::vector<TokenId> &tokenIds,
BlockObjSPtr prevBlock = nullptr, HashValue extraHash = 0,
size_t rankIdx = 0) override;
BlockObjSPtr AllocateImmutableBlock(DeviceType deviceType, std::vector<TokenId> &tokenIds,
BlockObjSPtr prevBlock = nullptr, HashValue extraHash = 0,
size_t rankIdx = 0) override;
std::vector<BlockObjSPtr> AllocateImmutableBlocks(DeviceType deviceType,
std::vector<std::vector<TokenId>> &tokenIds,
BlockObjSPtr prevBlock = nullptr, HashValue extraHash = 0,
size_t rankIdx = 0) override;
void Free(BlockObjSPtr &block) override;
std::vector<BlockObjSPtr> Fork(BlockObjSPtr &block) override;
size_t GetNumTotalBlocks(DeviceType deviceType) const override;
size_t GetNumFreeBlock(DeviceType deviceType) const override;
size_t GetNumFreeBlock(DeviceType deviceType, size_t rankId) const override;
PhysicalBlockId GetPhysicalBlockId(BlockId globalBlockId) const override;
std::vector<std::pair<BlockId, BlockId>> Swap(std::vector<BlockObjSPtr> &swapTargetBlocks, DeviceType srcDevice,
DeviceType dstDevice) override;
size_t GetNumFullBlocksTouched(const std::vector<BlockObjSPtr> &blocks, DeviceType deviceType) const override;
std::vector<std::pair<BlockId, BlockId>> ClearCopyOnWrites() override;
void MarkBlocksAsAccessed(size_t rankId, const std::vector<BlockId> &blockIds, float now) override;
void MarkBlocksAsComputed() override;
std::vector<BlockId> GetCommonComputedBlockIds(
const std::vector<std::vector<BlockId>> &computedSeqBlockIds) const override;
std::vector<size_t> GetAllRankCommonComputedBlockNum(
const std::vector<std::vector<std::vector<BlockId>>> &rankedComputedSeqBlockIds) const override;
size_t GetCachedBlockNum(size_t rankIdx, std::vector<HashValue> hashValues) const override;
float GetPrefixCacheHitRate() const override;
bool ResetPrefixCache() const override;
bool FindCachedBlockPrefix(size_t rankIdx, HashValue blockHash) const override;
std::vector<BlockId> FindCachedBlocksPrefix(size_t rankIdx, std::vector<HashValue> &blockHashes) const override;
std::vector<std::pair<BlockId, BlockId>> GetAndResetSwaps();
void AppendTokenIds(BlockObjSPtr block, const std::vector<TokenId> &tokenIds) override;
void ReplaceToken(BlockObjSPtr block, size_t startIndex, TokenId newToken) override;
private:
size_t rankSize_;
std::vector<BlockAllocatorSPtr> cpuAllocators_;
std::vector<BlockAllocatorSPtr> npuAllocators_;
std::unordered_map<BlockId, BlockId> swapMapping_;
BlockId beginCpuBlockId_;
size_t hostSize_;
DeviceType GetDeviceTypeForBlockId(BlockId blockId) const;
BlockAllocatorSPtr GetAllocator(DeviceType deviceType, size_t rankIdx = 0) const;
};
}
#endif