* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
#ifndef B0F387D2672945138C8316EF65776372_H
#define B0F387D2672945138C8316EF65776372_H
#include <cstdint>
#include <array>
#include <memory>
#include <unordered_map>
#include "runtime/mem.h"
#include "mem_block.h"
#include "ge/ge_api_types.h"
#include "ge/ge_allocator.h"
#include "kernel/memory/allocator/scalable_allocator.h"
#include "runtime/mem_allocator.h"
#include "exe_graph/runtime/gert_mem_allocator.h"
#include "rts_caching_mem_allocator.h"
#include "multi_stream_mem_block_pool.h"
#include "framework/runtime/device_memory_recorder.h"
namespace gert {
constexpr uint32_t MEM_QUEUE_NUM = 21U;
namespace memory {
struct CachingMemAllocator : public ge::Allocator, public MemSynchronizer {
CachingMemAllocator(const uint32_t device_id, const rtMemType_t memory_type);
CachingMemAllocator(const std::string &graph_name, const uint32_t device_id, const rtMemType_t memory_type);
CachingMemAllocator(const uint32_t device_id, const rtMemType_t memory_type, ScalableConfig &config);
explicit CachingMemAllocator(const uint32_t device_id);
~CachingMemAllocator() override {
const std::lock_guard<std::mutex> lock(mutex_);
for (size_t i = 0U; i < same_thread_allocators_.size(); ++i) {
if (same_thread_allocators_[i] == this) {
same_thread_allocators_.erase(same_thread_allocators_.begin() + i);
break;
}
}
for (size_t i = 0U; i < all_caching_mem_allocators_.size(); ++i) {
if (all_caching_mem_allocators_[i] == this) {
all_caching_mem_allocators_.erase(all_caching_mem_allocators_.begin() + i);
break;
}
}
(void)Finalize(true);
}
ge::MemBlock *Malloc(size_t size) override;
void Free(ge::MemBlock *block) override {
DeviceMemoryRecorder::ReduceTotalAllocateMemory(static_cast<uint64_t>(block->GetSize()));
memory_pool_->Free(dynamic_cast<PageSpan *>(block));
}
ge::Status Finalize(bool no_log = false);
static std::unique_ptr<CachingMemAllocator> GetAllocator();
static std::unique_ptr<CachingMemAllocator> GetAllocator(const uint32_t device_id);
static std::unique_ptr<CachingMemAllocator> GetAllocator(const std::string &graph_name, const uint32_t device_id,
const rtMemType_t rt_mem_type);
thread_local static std::vector<CachingMemAllocator *> same_thread_allocators_;
static std::vector<CachingMemAllocator *> all_caching_mem_allocators_;
ge::Status Synchronize() const override;
void Recycle() override;
void SetStream(const aclrtStream stream) {
stream_ = stream;
}
DeviceId GetDeviceId() const {
return rts_mem_allocator_.GetDeviceId();
}
public:
static std::mutex mutex_;
ScalableAllocator *GetScalableAllocator() {
return reinterpret_cast<ScalableAllocator *>(memory_pool_.get());
}
private:
void RecallMemBlocks(size_t start_queue_index = 0);
ge::Status TryExtendCache(size_t queue_index);
ge::MemBlock *AllocateWithTryRecycle(size_t size);
private:
RtsFirstLevelPool rts_mem_allocator_;
SpanAllocatorImp span_allocator_;
std::unique_ptr<MemoryPool> memory_pool_;
aclrtStream stream_ = nullptr;
};
}
}
#endif