* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
#include "graph/manager/active_memory_allocator.h"
#include <string>
#include "common/math/math_util.h"
#include "common/checker.h"
#include "graph/load/model_manager/model_utils.h"
#include "graph/manager/graph_var_manager.h"
#include "common/aclrt_malloc_helper.h"
namespace {
const std::string k1gHugePageFirstMallocFail = "The option ge.variableUse1gHugePage was set to 2, but the "
"allocation of 1GB of huge page memory failed. Instead, an attempt was made to allocate 2M large pages. "
"This error may affect the performance of the operator execution, resulting in the inability to fully "
"maximize the performance benefits of this option. If you do not care about these performance benefits, "
"the above error log can be ignored.";
const std::string k1gHugePageOnlyMallocFail = "The option ge.variableUse1gHugePage was set to 1, but the "
"allocation of 1GB of super large page memory failed.";
size_t AlignSize(const size_t size, const size_t align_size) {
return ((size + align_size - 1U) & (~(align_size - 1U)));
}
std::string GetPgType(size_t pg_type) {
if (pg_type == ge::kDrvMemPropPgType2M) {
return "2M";
} else if (pg_type == ge::kDrvMemPropPgType1G) {
return "1G";
}
return "unknown";
}
uint32_t TransMemType(const uint32_t mem_type) {
static const std::map<uint32_t, uint32_t> kRtMemTypeToDrvMemType = {{RT_MEMORY_HBM, 0U},
{RT_MEMORY_DDR, 1U},
{RT_MEMORY_P2P_HBM, 2U},
{RT_MEMORY_P2P_DDR, 3U},
{RT_MEMORY_TS, 4U}};
const std::map<uint32_t, uint32_t>::const_iterator it = kRtMemTypeToDrvMemType.find(mem_type);
if (it == kRtMemTypeToDrvMemType.cend()) {
return 0U;
} else {
return it->second;
}
}
uint8_t *Malloc(const std::string &purpose, const rtMemType_t memory_type, const uint32_t device_id,
const size_t align_size, size_t &malloc_size) {
void *memory_addr = nullptr;
if (malloc_size > align_size) {
malloc_size = AlignSize(malloc_size, align_size);
}
const aclError rt_ret = ge::AclrtMalloc(&memory_addr, malloc_size, memory_type, GE_MODULE_NAME_U16);
if (rt_ret != ACL_SUCCESS) {
memory_addr = nullptr;
}
GE_PRINT_DYNAMIC_MEMORY(aclrtMalloc, ge::ToMallocMemInfo(purpose, memory_addr, device_id, GE_MODULE_NAME_U16).c_str(),
malloc_size);
return reinterpret_cast<uint8_t *>(memory_addr);
}
static bool CompareSize(const ge::LogicalMemoryBlock &left, const ge::LogicalMemoryBlock &right) {
return left.memory_size > right.memory_size;
}
bool HasIntersection(const ge::PageRecord &left, const ge::PageRecord &right) {
return (left.head_offset < (right.head_offset + right.using_size)) &&
(right.head_offset < (left.head_offset + left.using_size));
}
}
namespace ge {
uint8_t *ActiveMemoryAllocator::MallocMemory(const std::string &purpose, LogicalMemorys &logical_memorys,
std::vector<std::pair<uint8_t *, size_t>> &active_memorys,
const uint32_t device_id) {
const std::lock_guard<std::recursive_mutex> lock(mutex_);
support_extend_memory_full_ = ge::VarManager::IsGeUseExtendSizeMemoryFull();
used_count_++;
int64_t memory_size = 0;
GELOGI("Before malloc logical_memorys:");
for (auto &logical_memory : logical_memorys) {
GELOGI("%s", logical_memory.ToString().c_str());
GE_ASSERT_TRUE(logical_memory.memory_size >= 0);
if (logical_memory.alloc) {
memory_size += logical_memory.memory_size;
}
}
GE_ASSERT_TRUE(memory_size >= 0);
memory_size_ = memory_size;
for (auto &block : active_memorys_) {
block.Reset();
}
MallocByActiveMemorys(logical_memorys);
MergeBlocks(logical_memorys);
MallocActiveMemorys(purpose, logical_memorys, active_memorys);
GELOGI("After malloc logical_memorys:");
bool all_success = logical_memorys.empty() ? false : true;
for (auto &logical_memory : logical_memorys) {
if (logical_memory.active_addr == nullptr) {
all_success = false;
}
GELOGI("%s", logical_memory.ToString().c_str());
}
GELOGI("ActiveMemoryAllocator::MallocMemory device_id = %u, size = %" PRIu64 " used_count = %zu.",
device_id, memory_size, used_count_);
return all_success ? logical_memorys[0].active_addr : nullptr;
}
Status ActiveMemoryAllocator::FreeMemory(const uint32_t device_id) {
const std::lock_guard<std::recursive_mutex> lock(mutex_);
if (used_count_ > 0U) {
used_count_--;
}
if (used_count_ == 0U) {
if (IsSupportExpandableMemoryFull()) {
expandable_memory_allocator_.FreeMemory();
} else {
for (auto memory : active_memorys_) {
(void)aclrtFree(reinterpret_cast<void *>(memory.active_addr));
}
}
active_memorys_.clear();
memory_size_ = 0;
maximum_active_addr_ = nullptr;
peak_size_ = 0U;
}
GELOGI("ActiveMemoryAllocator::FreeMemory used_count = %zu device_id = %u", used_count_, device_id);
return ge::SUCCESS;
}
int64_t ActiveMemoryAllocator::MemorySize() {
const std::lock_guard<std::recursive_mutex> lock(mutex_);
return peak_size_;
}
void ActiveMemoryAllocator::MallocByActiveMemorys(LogicalMemorys &logical_memorys) {
std::sort(logical_memorys.begin(), logical_memorys.end(), CompareSize);
for (auto &block : active_memorys_) {
for (auto &logical_memory : logical_memorys) {
if (logical_memory.active_addr == reinterpret_cast<uint8_t *>(logical_memory.logical_addr)
|| (logical_memory.active_addr == nullptr)) {
logical_memory.active_addr = block.Malloc(static_cast<size_t>(logical_memory.memory_size));
}
}
}
std::sort(logical_memorys.begin(), logical_memorys.end());
}
void ActiveMemoryAllocator::MergeBlocks(LogicalMemorys &logical_memorys) const {
if (logical_memorys.size() <= 1U) {
return;
}
LogicalMemorys merged_memorys;
auto logical_memory = logical_memorys[0];
for (size_t i = 1U; i < logical_memorys.size(); ++i) {
const auto ¤t = logical_memorys[i];
const auto &pre = logical_memorys[i - 1U];
if (current.alloc && (current.logical_addr == (pre.logical_addr + pre.memory_size))
&& ((current.active_addr == (pre.active_addr + pre.memory_size))
|| ((current.active_addr == nullptr) && (pre.active_addr == nullptr))) && (!current.is_zero_copy)) {
logical_memory.memory_size += current.memory_size;
} else {
merged_memorys.emplace_back(logical_memory);
logical_memory = current;
}
if (i == (logical_memorys.size() - 1U)) {
merged_memorys.emplace_back(logical_memory);
}
}
logical_memorys = merged_memorys;
}
void ActiveMemoryAllocator::MallocActiveMemorys(const std::string &purpose, LogicalMemorys &logical_memorys,
std::vector<std::pair<uint8_t *, size_t>> &active_memorys) {
for (auto &logical_memory : logical_memorys) {
if (logical_memory.active_addr != nullptr) {
continue;
}
if (logical_memory.alloc) {
size_t malloc_size = static_cast<size_t>(logical_memory.memory_size);
if (support_extend_memory_full_) {
logical_memory.active_addr = expandable_memory_allocator_.MallocMemory(purpose, malloc_size, true);
}
if (logical_memory.active_addr == nullptr) {
logical_memory.active_addr = Malloc(purpose, memory_type_, device_id_, kDrvPageSize, malloc_size);
}
if (logical_memory.active_addr == nullptr) {
return;
}
if ((logical_memory.active_addr + malloc_size) > maximum_active_addr_) {
maximum_active_addr_ = logical_memory.active_addr + malloc_size;
}
ActiveMemoryBlock
active_memory(logical_memory.active_addr, malloc_size, static_cast<size_t>(logical_memory.memory_size));
active_memorys_.emplace_back(std::move(active_memory));
std::sort(active_memorys_.begin(), active_memorys_.end());
} else {
logical_memory.active_addr = maximum_active_addr_;
}
}
size_t total_size = 0U;
size_t used_size = 0U;
for (auto &active_memory : active_memorys_) {
GELOGI("%s", active_memory.ToString().c_str());
total_size += active_memory.total_size;
used_size += active_memory.used_size;
if (active_memory.used_size == 0U) {
continue;
}
if (support_extend_memory_full_ && (!active_memory.new_add)) {
size_t reuse_size = 0U;
(void) expandable_memory_allocator_.GetPyhsicalMemoryAllocator().MallocPhysicalMemory(purpose,
active_memory.active_addr, active_memory.used_size, reuse_size);
}
active_memorys.emplace_back(active_memory.active_addr, active_memory.used_size);
(void)aclrtMemset(active_memory.active_addr, active_memory.used_size, 0U, active_memory.used_size);
}
if (used_size > peak_size_) {
peak_size_ = used_size;
}
GELOGI("Total active memory alloc_size:%zu, used_size:%zu, peak_size:%zu", total_size, used_size, peak_size_);
}
bool ActiveMemoryAllocator::IsSupportExpandableMemoryFull() {
const std::lock_guard<std::recursive_mutex> lock(mutex_);
return expandable_memory_allocator_.IsSupportExpandableMemory() && support_extend_memory_full_;
}
Status ActiveMemoryAllocator::MallocPhysicalMemory(const std::string &purpose,
const std::vector<std::pair<uint8_t *, size_t>> &active_memorys) {
const std::lock_guard<std::recursive_mutex> lock(mutex_);
size_t malloced_size = 0U;
size_t reuse_size = 0U;
for (const auto &addr : active_memorys) {
const Status ret = expandable_memory_allocator_.GetPyhsicalMemoryAllocator().MallocPhysicalMemory(purpose,
addr.first, addr.second, reuse_size);
GE_ASSERT_SUCCESS(ret, "ret = %u.", ret);
malloced_size += addr.second;
}
GELOGI("ActiveMemoryAllocator::MallocPhysicalMemory memory_size:%zu device_id = %u memory_type = %u",
malloced_size, device_id_, memory_type_);
return SUCCESS;
}
void ActiveMemoryAllocator::Recycle(const std::vector<std::pair<uint8_t *, size_t>> &active_memorys) {
const std::lock_guard<std::recursive_mutex> lock(mutex_);
size_t freed_size = 0U;
for (const auto &addr : active_memorys) {
(void) expandable_memory_allocator_.GetPyhsicalMemoryAllocator().FreePhysicalMemory(addr.first, addr.second);
freed_size += addr.second;
}
GELOGI("ActiveMemoryAllocator::Recycle memory_size:%zu device_id = %u memory_type = %u",
freed_size, device_id_, memory_type_);
}
uint8_t *ExpandableActiveMemoryAllocator::MallocMemory(const std::string &purpose, int64_t memory_size,
bool incremental) {
const std::lock_guard<std::recursive_mutex> lock(mutex_);
GE_WARN_ASSERT(support_reserve_mem_address_);
GE_ASSERT_TRUE(memory_size >= 0);
const size_t malloc_size =
incremental ? (used_memory_size_ + static_cast<size_t>(memory_size)) : static_cast<size_t>(memory_size);
if (MallocVirtualMemory(malloc_size) != SUCCESS) {
support_reserve_mem_address_ = false;
GELOGW("Maybe not support rtReserveMemAddress.");
return nullptr;
}
GE_ASSERT_SUCCESS(MallocPhysicalMemoryAndMap(purpose, malloc_size));
auto memory_addr = virtual_active_addr_;
if (incremental) {
memory_addr = virtual_active_addr_ + used_memory_size_;
used_memory_size_ += static_cast<size_t>(memory_size);
} else {
used_count_++;
used_memory_size_ = std::max(used_memory_size_, static_cast<size_t>(memory_size));
}
GELOGI("[%s] Total[virtual_memory_addr:%p virtual_memory_size:%zu, used_memory_size:%zu],"
" current[memory_addr:%p memory_size:%lld incremental:%d]", purpose.c_str(), virtual_active_addr_,
virtual_memory_size_, used_memory_size_, memory_addr, memory_size,
static_cast<int32_t>(incremental));
return memory_addr;
}
Status ExpandableActiveMemoryAllocator::FreeMemory() {
const std::lock_guard<std::recursive_mutex> lock(mutex_);
if (used_count_ > 0U) {
used_count_--;
}
if (used_count_ == 0U) {
for (const auto &addr : mapped_memory_addrs_) {
(void) active_memory_allocator_.FreePhysicalMemory(addr.first, addr.second);
}
mapped_memory_addrs_.clear();
used_memory_size_ = 0U;
if (virtual_active_addr_ != nullptr) {
active_memory_allocator_.ReleaseVirtualMemory();
virtual_active_addr_ = nullptr;
}
}
GELOGI("ActiveMemoryAllocator::FreeMemory used_count = %zu device_id = %u memory_type = %u",
used_count_, device_id_, memory_type_);
return SUCCESS;
}
Status ExpandableActiveMemoryAllocator::MallocVirtualMemory(const size_t memory_size) {
const std::lock_guard<std::recursive_mutex> lock(mutex_);
if (virtual_active_addr_ == nullptr) {
size_t free_mem = 0U;
size_t total_mem_size = 0U;
GE_ASSERT_RT_OK(aclrtGetMemInfo(ACL_HBM_MEM, &free_mem, &total_mem_size));
if (total_mem_size == 0U) {
aclrtMemAttr mem_info_type = memory_type_ == RT_MEMORY_HBM ? ACL_DDR_MEM : ACL_HBM_MEM_P2P_HUGE;
GE_ASSERT_RT_OK(aclrtGetMemInfo(mem_info_type, &free_mem, &total_mem_size));
}
(void) free_mem;
GE_ASSERT_TRUE(memory_size <= total_mem_size, "memory_size: %zu, total_mem_size: %zu", memory_size, total_mem_size);
virtual_memory_size_ = total_mem_size;
virtual_active_addr_ = active_memory_allocator_.ReserveVirtualMemory(virtual_memory_size_, device_id_,
memory_type_, share_phy_allocator_);
GE_WARN_ASSERT(virtual_active_addr_ != nullptr);
active_memory_allocator_.SetReuse(reuse_);
}
return SUCCESS;
}
Status ExpandableActiveMemoryAllocator::MallocPhysicalMemoryAndMap(const std::string &purpose,
const size_t memory_size) {
if (memory_size > used_memory_size_) {
auto virtual_active_addr = virtual_active_addr_ + used_memory_size_;
size_t reuse_size = 0U;
GE_ASSERT_SUCCESS(active_memory_allocator_.MallocPhysicalMemory(purpose, virtual_active_addr,
memory_size - used_memory_size_, reuse_size));
mapped_memory_addrs_.emplace_back(virtual_active_addr, memory_size - used_memory_size_);
}
return SUCCESS;
}
MemBlock *FixedBaseExpandableAllocator::Malloc(size_t size) {
const auto ptr = ex_active_allocator_.MallocMemory(kFixedBasePurpose, static_cast<int64_t>(size),
false);
GE_ASSERT_NOTNULL(ptr, "malloc failed, [%s], size: %zu", kFixedBasePurpose.c_str(), size);
auto block = new (std::nothrow) MemBlock(*this, ptr, size);
GE_ASSERT_NOTNULL(block);
GELOGI("[%s] malloc, block: %p, addr: %p, size: %zu", kFixedBasePurpose.c_str(), block, ptr, size);
return block;
}
void FixedBaseExpandableAllocator::Free(MemBlock *mem_block) {
if (mem_block != nullptr) {
(void)ex_active_allocator_.FreeMemory();
GELOGI("[%s] free, block: %p, addr: %p, size: %zu", kFixedBasePurpose.c_str(), mem_block,
mem_block->GetAddr(), mem_block->GetSize());
delete mem_block;
}
}
ExpandableActiveMemoryAllocatorImp::~ExpandableActiveMemoryAllocatorImp() {
try {
ReleaseVirtualMemory();
} catch (const std::exception &) {
}
}
uint8_t *ExpandableActiveMemoryAllocatorImp::ReserveVirtualMemory(size_t &virtual_memory_size,
const uint32_t device_id,
const rtMemType_t memory_type,
const bool share_phy_allocator) {
if (virtual_memory_addr_base_ != nullptr) {
return virtual_memory_addr_base_;
}
if ((page_size_!= kDrv1GPageSize) && ge::ModelUtils::IsGeUseExtendSizeMemory(true)) {
page_size_bits_ = kLargePageSizeBits;
page_size_ = (1U << page_size_bits_);
page_size_mask_ = (page_size_ - 1U);
}
virtual_memory_size = AlignSize(virtual_memory_size, page_size_);
virtual_memory_size_ = virtual_memory_size;
GE_ASSERT_EQ((virtual_memory_size_ & page_size_mask_), 0U);
const size_t memory_block_count = virtual_memory_size_ >> page_size_bits_;
physical_memorys_.resize(memory_block_count);
device_id_ = device_id;
free_physical_memorys_.reserve(memory_block_count);
pa_list_.reserve(memory_block_count);
map_count_ = 0U;
if (share_phy_allocator) {
physical_memory_allocator_ =
PhysicalMemoryAllocatorMgr::Instance().CreateAllocator(device_id, memory_type, page_size_);
} else {
physical_memory_allocator_ = ge::MakeShared<PhysicalMemoryAllocator>(device_id, memory_type);
}
GE_ASSERT_TRUE(physical_memory_allocator_ != nullptr);
void *virtual_addr = nullptr;
GE_WARN_ASSERT(physical_memory_allocator_->ReserveMemAddress(&virtual_addr, virtual_memory_size_) == SUCCESS);
virtual_memory_addr_base_ = reinterpret_cast<uint8_t *>(virtual_addr);
GE_WARN_ASSERT(virtual_memory_addr_base_ != nullptr);
GE_ASSERT_SUCCESS(physical_memory_allocator_->Initialize(page_size_, memory_block_count));
log_level_ = dlog_getlevel(GE_MODULE_NAME, nullptr);
GEEVENT("virtual_active_addr_base:%p virtual_memory_size:%s device_id:%u, memory_type:%u, share_phy_allocator:%d.",
virtual_memory_addr_base_, ActiveMemoryUtil::SizeToString(virtual_memory_size_).c_str(), device_id_,
memory_type, share_phy_allocator);
return virtual_memory_addr_base_;
}
void ExpandableActiveMemoryAllocatorImp::ReleaseVirtualMemory() noexcept {
if (virtual_memory_addr_base_ != nullptr) {
(void) FreePhysicalMemory(virtual_memory_addr_base_, virtual_memory_size_);
(void) physical_memory_allocator_->Finalize(virtual_memory_addr_base_, virtual_memory_size_);
GEEVENT("virtual_active_addr_base:%p virtual_memory_size:%zu device_id:%u.",
virtual_memory_addr_base_, virtual_memory_size_, device_id_);
virtual_memory_size_ = 0U;
virtual_memory_addr_base_ = nullptr;
for (auto physical_memorys : physical_memorys_) {
if (physical_memorys.is_using || (physical_memorys.ref_count != 0U)
|| (physical_memorys.physical_map_count != 0U) || (physical_memorys.handle != nullptr)) {
GELOGE(ge::FAILED, "index:%zu is_using:%d ref_count:%zu physical_map_count:%zu handle:%p",
physical_memorys.index, physical_memorys.is_using, physical_memorys.ref_count,
physical_memorys.physical_map_count, physical_memorys.handle);
}
}
physical_memorys_.clear();
}
}
Status ExpandableActiveMemoryAllocatorImp::GetIndex(const uint8_t *const virtual_memory_addr,
const size_t virtual_memory_size,
size_t &index_begin,
size_t &index_end,
size_t &end_remain_size) const {
const size_t
offset_begin = static_cast<size_t>(PtrToValue(virtual_memory_addr) - PtrToValue(virtual_memory_addr_base_));
index_begin = offset_begin >> page_size_bits_;
index_end = (offset_begin + virtual_memory_size) >> page_size_bits_;
if ((index_end >= 1U) && ((offset_begin + virtual_memory_size) & page_size_mask_) == 0U) {
index_end -= 1U;
}
const size_t physical_memorys_counts = physical_memorys_.size();
if ((index_end >= physical_memorys_counts) && (physical_memorys_counts > 0U)) {
if ((offset_begin + virtual_memory_size) > (physical_memorys_counts << page_size_bits_)) {
GELOGE(FAILED, "virtual_memory_addr:%p virtual_memory_size:%zu is invalid.",
virtual_memory_addr, virtual_memory_size);
return FAILED;
}
index_end = physical_memorys_counts - 1U;
}
GE_ASSERT_TRUE(index_begin < physical_memorys_counts);
GE_ASSERT_TRUE(index_end < physical_memorys_counts);
if (index_begin < index_end) {
if (physical_memorys_[index_end].ref_count == 0U) {
end_remain_size = 0U;
} else {
end_remain_size = (offset_begin + virtual_memory_size) & page_size_mask_;
}
GELOGI("index_begin:%zu index_end:%zu begin_remain_size:%zu end_remain_size:%zu ref_count:%zu", index_begin,
index_end, offset_begin & page_size_mask_, end_remain_size, physical_memorys_[index_end].ref_count);
}
return SUCCESS;
}
void ExpandableActiveMemoryAllocatorImp::PutToFreeList(PhysicalMemoryInfo &physical_memory) {
if ((!physical_memory.in_free_list) && (physical_memory.ref_count == 0U) && (!physical_memory.is_using)
&& (physical_memory.handle != nullptr)) {
free_physical_memorys_.emplace_back(physical_memory.index);
physical_memory.in_free_list = true;
}
}
bool ExpandableActiveMemoryAllocatorImp::PopFromFreeList(size_t &index) {
while(!free_physical_memorys_.empty()) {
auto &physical_memory = physical_memorys_[free_physical_memorys_.back()];
free_physical_memorys_.pop_back();
physical_memory.in_free_list = false;
if ((physical_memory.ref_count == 0U) && (!physical_memory.is_using) && (physical_memory.handle != nullptr)) {
index = physical_memory.index;
return true;
}
}
return false;
}
Status ExpandableActiveMemoryAllocatorImp::MallocPhysicalMemory(uint8_t *const virtual_memory_addr,
const std::vector<size_t> &pa_list, bool need_map) {
const size_t pa_list_size = pa_list.size();
GELOGI("pa_list_size:%zu index_begin:%zu index_end:%zu", pa_list_size, pa_list.front(), pa_list.back());
GE_ASSERT_TRUE(!vapa_check_failed_, "ProcPageRecord failed during the last call, return failed immediately");
auto unmap_func = [&pa_list, virtual_memory_addr, this](const size_t index) {
for (size_t i = 0U; i < index; ++i) {
auto &physical_memory = physical_memorys_[pa_list[i]];
if (physical_memory.physical_map_count > 0U) {
physical_memory.physical_map_count--;
}
physical_memory.is_using = false;
void *map_addr = reinterpret_cast<void *>(virtual_memory_addr + (i << page_size_bits_));
(void) physical_memory_allocator_->UnmapMem(map_addr, HanleToIndex(physical_memory.handle));
}
};
for (size_t index = 0U; index < pa_list_size; ++index) {
auto &phys_mem = physical_memorys_[pa_list[index]];
if (!phys_mem.is_using) {
phys_mem.is_using = true;
if (!need_map) {
continue;
}
void *map_addr = reinterpret_cast<void *>(virtual_memory_addr + (index << page_size_bits_));
if (physical_memory_allocator_->MapMem(map_addr, HanleToIndex(phys_mem.handle)) != SUCCESS) {
phys_mem.is_using = false;
unmap_func(index);
return FAILED;
}
HP_LOGD("MapMem addr:%p index:%zu size:%zu success, virtual_memory_addr:%p.",
map_addr, phys_mem.index, page_size_, virtual_memory_addr);
map_count_++;
phys_mem.physical_map_count++;
continue;
}
GELOGI("PHYSICAL_MEM_USING index:%zu ref_count:%zu is_using:%d", pa_list[index],
phys_mem.ref_count, phys_mem.is_using);
for (size_t i = 0U; i < index; ++i) {
auto &physical_memory = physical_memorys_[pa_list[i]];
if (physical_memory.is_using) {
physical_memory.is_using = false;
}
PutToFreeList(physical_memory);
}
return PHYSICAL_MEM_USING;
}
if (ProcPageRecordByPaList(virtual_memory_addr, pa_list, PageRecordAction::kAdd) != SUCCESS) {
unmap_func(pa_list_size);
(void)ProcPageRecordByPaList(virtual_memory_addr, pa_list, PageRecordAction::kDel);
vapa_check_failed_ = true;
GELOGE(FAILED, "malloc failed. virtual_memory_addr: %p, pa_list_size: %zu", virtual_memory_addr, pa_list_size);
return FAILED;
}
return SUCCESS;
}
Status ExpandableActiveMemoryAllocatorImp::ProcessPhysicalMemoryUsing(size_t index_begin, size_t index_end,
size_t index_using, size_t end_remain_size, size_t &reuse_size) {
size_t roll_back_end = index_end;
if ((roll_back_end > 0U) && (end_remain_size > 0U)) {
roll_back_end--;
auto &physical_memory = physical_memorys_[index_end];
if (physical_memory.ref_count > 0U) {
physical_memory.ref_count--;
}
}
if (NeedAllocPa(index_begin)) {
reuse_size = 0U;
for (int64_t index = static_cast<int64_t>(roll_back_end);
(index >= 0) && (physical_memorys_[index].handle != nullptr); --index) {
reuse_size += page_size_;
}
reuse_size += end_remain_size;
}
size_t roll_back_size = end_remain_size;
for (size_t index = roll_back_end; ((index > index_using) && (roll_back_size < reuse_size)); --index) {
auto &free_physical_memory = physical_memorys_[index];
if ((free_physical_memory.ref_count > 0U) && (free_physical_memory.is_using)) {
free_physical_memory.ref_count--;
free_physical_memory.is_using = false;
if (!free_physical_memory.in_free_list) {
PutToFreeList(free_physical_memory);
}
}
roll_back_size += page_size_;
}
return PHYSICAL_MEM_USING;
}
Status ExpandableActiveMemoryAllocatorImp::ProcessNewVa(size_t index_end, size_t end_remain_size, size_t new_va_size,
size_t &reuse_size) {
for (const auto index : pa_list_) {
auto &physical_memory = physical_memorys_[index];
if (physical_memory.ref_count > 0U) {
physical_memory.ref_count--;
}
if (physical_memory.is_using) {
physical_memory.is_using = false;
}
}
if (end_remain_size > 0U) {
auto &physical_memory = physical_memorys_[index_end];
if (physical_memory.ref_count > 0U) {
physical_memory.ref_count--;
}
}
reuse_size += end_remain_size;
GE_ASSERT_EQ(new_va_size, pa_list_.size());
GELOGI("pa_list_size:%zu", pa_list_.size());
return NEW_VA;
}
size_t ExpandableActiveMemoryAllocatorImp::RecyclePhysicalMemory(size_t new_va_count, size_t end_remain_size,
size_t index_end, size_t &index_begin, bool &recycle) {
size_t index = 0U;
size_t recycle_count = 0U;
recycle = (((theory_min_size_ * kRatioBase) / (physical_memory_size_ + page_size_)) < kTheoryRatio);
while(recycle && (pa_list_.size() < new_va_count) && PopFromFreeList(index)) {
auto &free_physical_memory = physical_memorys_[index];
free_physical_memory.is_using = true;
pa_list_.emplace_back(free_physical_memory.index);
recycle_count++;
}
if (recycle_count > 0U) {
index_begin = index_end - (new_va_count - recycle_count);
if ((end_remain_size == 0U) && (index_begin < index_end)) {
index_begin++;
}
}
GELOGI("Current physical_memory_size:%zu theory_size:%zu page_size:%zu reach theory rate:%.2f%s recycle:%d "
"new_va_count:%zu recycle_count:%zu", physical_memory_size_, theory_size_, page_size_,
(kRatioBase * static_cast<float>(theory_min_size_)) / static_cast<float>(physical_memory_size_ + page_size_),
"%", recycle, new_va_count, recycle_count);
return recycle_count;
}
void ExpandableActiveMemoryAllocatorImp::ReleasePhysicalMemory(PhysicalMemoryInfo &physical_memory) {
if (physical_memory.ref_count > 0U) {
physical_memory.ref_count--;
if (physical_memory.ref_count == 0U) {
ReleasePhysicalMemoryByIndex(physical_memory.index);
}
}
if (physical_memory.ref_count == 0U) {
physical_memory.is_using = false;
}
}
Status ExpandableActiveMemoryAllocatorImp::MallocPhysicalMemoryByIndex(const std::string &purpose, size_t index,
size_t index_end, bool need_alloc_pa, size_t &reuse_size) {
auto release_func = [index_end, this](const size_t index_begin) {
for (int64_t i = static_cast<int64_t>(index_end); i > static_cast<int64_t>(index_begin); --i) {
ReleasePhysicalMemory(physical_memorys_[i]);
}
for (const auto idx : pa_list_) {
ReleasePhysicalMemory(physical_memorys_[idx]);
}
};
auto &malloc_physical_memory = physical_memorys_[index];
void *map_addr = reinterpret_cast<void *>(virtual_memory_addr_base_ + (index << page_size_bits_));
if (physical_memory_allocator_->MallocPhysical(purpose, malloc_physical_memory.last_pa_index, map_addr, reuse_)
!= SUCCESS) {
release_func(index);
return FAILED;
}
if (physical_memory_allocator_->MapMem(map_addr, malloc_physical_memory.last_pa_index) != SUCCESS) {
(void) physical_memory_allocator_->FreePhysical(malloc_physical_memory.last_pa_index);
malloc_physical_memory.last_pa_index = kInvalidIndex;
release_func(index);
return FAILED;
}
map_count_++;
malloc_physical_memory.index = index;
malloc_physical_memory.handle = IndexToHanle(malloc_physical_memory.last_pa_index);
malloc_physical_memory.ref_count = 1UL;
malloc_physical_memory.is_using = true;
malloc_physical_memory.physical_map_count = 1U;
malloc_physical_memory.is_physical_recycle = false;
HP_LOGD("MapMem addr:%p index:%zu last_pa_index:%zu size:%zu success, total physical_memory_size:%zu.",
map_addr, index, malloc_physical_memory.last_pa_index, page_size_, physical_memory_size_);
physical_memory_size_ += page_size_;
reuse_size += page_size_;
if (need_alloc_pa) {
pa_list_.emplace_back(index);
}
return SUCCESS;
}
Status ExpandableActiveMemoryAllocatorImp::MallocPhysicalMemory(const std::string &purpose,
const uint8_t *const virtual_memory_addr, const size_t virtual_memory_size, size_t &reuse_size) {
if (!IsValidVirtualAddr(virtual_memory_addr)) {
return SUCCESS;
}
GE_ASSERT_TRUE(!vapa_check_failed_, "ProcPageRecord failed during the last call, return failed immediately");
size_t index_begin = 0U;
size_t index_end = 0U;
size_t end_remain_size = 0U;
GE_ASSERT_SUCCESS(GetIndex(virtual_memory_addr, virtual_memory_size, index_begin, index_end, end_remain_size));
pa_list_.clear();
size_t recycle_count = 0U;
bool recycle = false;
size_t new_va_count = AlignSize(virtual_memory_size, page_size_) >> page_size_bits_;
for (int64_t index = static_cast<int64_t>(index_end); index >= static_cast<int64_t>(index_begin); --index) {
auto &physical_memory = physical_memorys_[index];
if (physical_memory.handle == nullptr) {
if ((!physical_memory.is_physical_recycle) && (!recycle)) {
recycle_count = RecyclePhysicalMemory(new_va_count, end_remain_size, index_end, index_begin, recycle);
}
if (((recycle_count > 0U) && (pa_list_.size() == new_va_count)) || (index < static_cast<int64_t>(index_begin))) {
HP_LOGD("recycle_count:%zu pa_list_size:%zu new_va_count%:zu index:%" PRId64 " index_begin:%zu", recycle_count,
pa_list_.size(), new_va_count, index, index_begin);
break;
}
GE_ASSERT_SUCCESS(MallocPhysicalMemoryByIndex(purpose, index, index_end, NeedAllocPa(index_begin), reuse_size));
} else {
HP_LOGD("index:%zu ref_count:%zu, is_using:%d, total physical_memory_size:%zu.",
index, physical_memory.ref_count, physical_memory.is_using, physical_memory_size_);
if ((physical_memory.ref_count > 0U) && IsBoundary(index, index_begin, index_end)) {
physical_memory.ref_count++;
continue;
}
if (physical_memory.is_using) {
HP_LOGD("PHYSICAL_MEM_USING index:%zu ref_count:%zu.", index, physical_memory.ref_count);
reuse_size = virtual_memory_size;
return ProcessPhysicalMemoryUsing(index_begin, index_end, index, end_remain_size, reuse_size);
}
physical_memory.is_using = true;
physical_memory.ref_count++;
reuse_size += page_size_;
if (NeedAllocPa(index_begin)) {
pa_list_.emplace_back(index);
}
}
}
GELOGI("virtual_memory_addr:%p virtual_memory_size:%zu success, total physical_memory_size:%zu.",
virtual_memory_addr, virtual_memory_size, physical_memory_size_);
if (recycle && (recycle_count > 0U)) {
return ProcessNewVa(index_end, end_remain_size, new_va_count, reuse_size);
}
GE_ASSERT_SUCCESS(ProcPageRecord(virtual_memory_addr, virtual_memory_size, PageRecordAction::kAdd), "malloc failed,"
" virtual_memory_addr: %p, virtual_memory_size: %zu", virtual_memory_addr, virtual_memory_size);
return SUCCESS;
}
Status ExpandableActiveMemoryAllocatorImp::FreePhysicalMemory(uint8_t *const virtual_memory_addr,
const std::vector<size_t> &pa_list,
const bool reduce_ref,
const bool release) {
size_t pa_list_size = pa_list.size();
GELOGI("pa_list_size:%zu index_begin:%zu index_end:%zu reduce_ref:%d release:%d",
pa_list_size, pa_list.front(), pa_list.back(), reduce_ref, release);
GE_ASSERT_SUCCESS(ProcPageRecordByPaList(virtual_memory_addr, pa_list, PageRecordAction::kDel));
for (size_t index = 0U; index < pa_list_size; ++index) {
const auto malloc_index = pa_list[index];
auto &physical_memory = physical_memorys_[malloc_index];
if (physical_memory.handle == nullptr) {
GELOGW("index:%zu physical_memory.handle is nullptr", malloc_index);
continue;
}
if (reduce_ref && physical_memory.is_using) {
physical_memory.is_using = false;
}
if (release) {
void *map_addr = reinterpret_cast<void *>(virtual_memory_addr + (index << page_size_bits_));
(void) physical_memory_allocator_->UnmapMem(map_addr, HanleToIndex(physical_memory.handle));
map_count_--;
if (physical_memory.physical_map_count > 0U) {
physical_memory.physical_map_count--;
}
} else {
PutToFreeList(physical_memory);
}
}
if (release) {
(void) physical_memory_allocator_->ReleaseMemAddress(virtual_memory_addr, pa_list_size * page_size_);
GEEVENT("virtual_memory_addr:%p virtual_memory_size:%zu device_id:%u", virtual_memory_addr,
pa_list.size() * page_size_, device_id_);
}
return SUCCESS;
}
Status ExpandableActiveMemoryAllocatorImp::ProcPageRecord(const uint8_t *const malloc_addr,
const size_t malloc_size,
const PageRecordAction &action) {
if (log_level_ > DLOG_INFO) {
return SUCCESS;
}
size_t index_begin;
size_t index_end;
size_t end_remain_size;
GE_ASSERT_SUCCESS(GetIndex(malloc_addr, malloc_size, index_begin, index_end, end_remain_size));
(void)end_remain_size;
ge::ScopeGuard guarder([this, malloc_addr, malloc_size, index_end, index_begin]() {
for (auto i = static_cast<int64_t>(index_end); i > static_cast<int64_t>(index_begin); --i) {
ReleasePhysicalMemory(physical_memorys_[i]);
}
for (const auto idx : pa_list_) {
ReleasePhysicalMemory(physical_memorys_[idx]);
}
(void)ProcPageRecord(malloc_addr, malloc_size, PageRecordAction::kDel);
vapa_check_failed_ = true;
});
for (auto index = index_begin; index <= index_end; ++index) {
const uint8_t *const map_addr = virtual_memory_addr_base_ + (index << page_size_bits_);
const auto head_offset = (index == index_begin) ? (PtrToValue(malloc_addr) - PtrToValue(map_addr)) : 0U;
const auto using_size = (index == index_end) ?
(PtrToValue(malloc_addr) + malloc_size - PtrToValue(map_addr) - head_offset) : (page_size_ - head_offset);
PageRecord page_record{map_addr, static_cast<size_t>(head_offset), static_cast<size_t>(using_size),
malloc_addr, malloc_size};
if (action == PageRecordAction::kAdd) {
GE_ASSERT_SUCCESS(physical_memory_allocator_->AddPageRecord(HanleToIndex(physical_memorys_[index].handle),
page_record), "virtual and physical page mapping check failed, base: %p, page_size_: %zu, index_begin: %zu,"
" index_end: %zu, index: %zu", virtual_memory_addr_base_, page_size_, index_begin, index_end, index);
} else {
physical_memory_allocator_->DelPageRecord(HanleToIndex(physical_memorys_[index].handle), page_record);
}
}
guarder.Dismiss();
return SUCCESS;
}
Status ExpandableActiveMemoryAllocatorImp::ProcPageRecordByPaList(const uint8_t *const malloc_addr,
const std::vector<size_t> &pa_list,
const PageRecordAction &action) {
if (log_level_ > DLOG_INFO) {
return SUCCESS;
}
const size_t pa_list_size = pa_list.size();
const auto malloc_size = pa_list_size << page_size_bits_;
for (size_t index = 0U; index < pa_list_size; ++index) {
const auto &phys_mem = physical_memorys_[pa_list[index]];
const uint8_t *const map_addr = malloc_addr + (index << page_size_bits_);
PageRecord page_record{map_addr, 0U, page_size_, malloc_addr, malloc_size};
if (action == PageRecordAction::kAdd) {
GE_ASSERT_SUCCESS(physical_memory_allocator_->AddPageRecord(HanleToIndex(phys_mem.handle), page_record),
"virtual and physical page mapping check failed, page_size_: %zu, pa_list_size: %zu, index: %zu",
page_size_, pa_list_size, index);
} else {
physical_memory_allocator_->DelPageRecord(HanleToIndex(phys_mem.handle), page_record);
}
}
return SUCCESS;
}
Status ExpandableActiveMemoryAllocatorImp::FreePhysicalMemory(const uint8_t *const virtual_memory_addr,
const size_t virtual_memory_size,
const bool reduce_ref,
const bool release) {
if (!IsValidVirtualAddr(virtual_memory_addr)) {
return SUCCESS;
}
GE_ASSERT_SUCCESS(ProcPageRecord(virtual_memory_addr, virtual_memory_size, PageRecordAction::kDel));
size_t index_begin = 0U;
size_t index_end = 0U;
size_t end_remain_size = 0U;
GE_ASSERT_SUCCESS(GetIndex(virtual_memory_addr, virtual_memory_size, index_begin, index_end, end_remain_size));
for (size_t index = index_begin; index <= index_end; ++index) {
auto &physical_memory = physical_memorys_[index];
if (physical_memory.handle == nullptr) {
continue;
}
if (reduce_ref && (physical_memory.ref_count > 0U)) {
physical_memory.ref_count--;
HP_LOGD("Reduce reference count index:%zu ref_count:%zu, is_using:%d, total physical_memory_size:%zu.",
index, physical_memory.ref_count,
(physical_memory.ref_count > 0U) ? physical_memory.is_using : false, physical_memory_size_);
if (physical_memory.ref_count > 0U) {
continue;
}
if (physical_memory.is_using) {
physical_memory.is_using = false;
}
if (!physical_memory.in_free_list) {
PutToFreeList(physical_memory);
}
}
if (release && (physical_memory.ref_count == 0U) && (!physical_memory.is_using)) {
ReleasePhysicalMemoryByIndex(index);
}
}
if (release || reduce_ref) {
GELOGI("virtual_memory_addr:%p virtual_memory_size:%zu success, total physical_memory_size:%zu.",
virtual_memory_addr, virtual_memory_size, physical_memory_size_);
}
return SUCCESS;
}
void ExpandableActiveMemoryAllocatorImp::ReleasePhysicalMemoryByIndex(const size_t index) {
auto &physical_memory = physical_memorys_[index];
GELOGD("index:%zu malloc index:%zu.", index, physical_memory.index);
if (physical_memory.physical_map_count > 0U) {
physical_memory.physical_map_count--;
}
if (physical_memory.handle == nullptr) {
return;
}
void *map_addr = nullptr;
if (physical_memory.physical_map_count == 0U) {
map_addr = reinterpret_cast<void *>(virtual_memory_addr_base_ + (index << page_size_bits_));
(void) physical_memory_allocator_->UnmapMem(map_addr, HanleToIndex(physical_memory.handle));
map_count_--;
(void) physical_memory_allocator_->FreePhysical(HanleToIndex(physical_memory.handle));
if (physical_memory_size_ >= page_size_) {
physical_memory_size_ -= page_size_;
}
physical_memory.handle = nullptr;
physical_memory.is_physical_recycle = true;
GELOGI("Unmap and free addr:%p size:%zu success, total physical_memory_size:%zu index:%zu physical_map_count:%zu.",
map_addr, page_size_, physical_memory_size_, index, physical_memory.physical_map_count);
}
}
std::string ActiveMemoryUtil::SizeToString(size_t size) {
const size_t kG = 1024UL * 1024UL * 1024UL;
const size_t kM = 1024UL * 1024UL;
const size_t kK = 1024UL;
const size_t kPrecision = 2U;
std::string return_value;
std::stringstream ss;
ss << std::fixed << std::setprecision(kPrecision);
if (size > kG) {
double sizeG = static_cast<double>(size) / static_cast<double>(kG);
ss << sizeG;
return_value = std::to_string(size);
return_value.append("(");
return_value.append(ss.str());
return_value.append("G)");
} else if (size > kM) {
double sizeM = static_cast<double>(size) / static_cast<double>(kM);
ss << sizeM;
return_value = std::to_string(size);
return_value.append("(");
return_value.append(ss.str());
return_value.append("M)");
} else if (size > kK) {
double sizeK = static_cast<double>(size) / static_cast<double>(kK);
ss << sizeK;
return_value = std::to_string(size);
return_value.append("(");
return_value.append(ss.str());
return_value.append("K)");
} else {
return_value = std::to_string(size);
}
return return_value;
}
size_t ActiveMemoryUtil::IntegerBitsNum(size_t page_size) {
size_t bits_num = 0U;
while (page_size > 1U) {
page_size >>= 1U;
bits_num++;
}
return bits_num;
}
Status PhysicalMemoryAllocator::Initialize(size_t page_size, size_t max_page_count) {
const std::lock_guard<std::recursive_mutex> lock(mutex_);
log_level_ = dlog_getlevel(GE_MODULE_NAME, nullptr);
GELOGI("Current ref_count:%zu.", ref_count_);
ref_count_++;
if (ref_count_ != 1U) {
GE_ASSERT_EQ(page_size_, page_size);
} else {
page_size_ = page_size;
prop_.side = 1U;
prop_.devid = device_id_;
prop_.module_id = GE_MODULE_NAME_U16;
prop_.pg_type = kDrvMemPropPgType2M;
if (page_size == kDrv1GPageSize) {
prop_.pg_type = kDrvMemPropPgType1G;
}
prop_.mem_type = TransMemType(memory_type_);
prop_.reserve = 0U;
if (max_page_count > physical_memorys_.capacity()) {
physical_memorys_.reserve(max_page_count);
free_physical_memorys_.reserve(max_page_count);
}
}
return SUCCESS;
}
Status PhysicalMemoryAllocator::Finalize(uint8_t *const va, size_t size) {
(void) ReleaseMemAddress(va, size);
const std::lock_guard<std::recursive_mutex> lock(mutex_);
if (ref_count_ > 0U) {
ref_count_--;
}
HP_LOGI("Current ref_count:%zu.", ref_count_);
for (auto &physical_memory : physical_memorys_) {
if (ref_count_ != 0U) {
continue;
}
if ((physical_memory.ref_count != 0U) || (!physical_memory.map_addrs.empty())) {
GELOGE(FAILED, "PhysicalMemoryAllocator pa_index:%zu ref_count:%zu.",
physical_memory.index, physical_memory.ref_count);
continue;
}
FreePhysicalPage(physical_memory);
}
if (physical_memory_size_ == 0U) {
physical_memorys_.clear();
}
return SUCCESS;
}
void PhysicalMemoryAllocator::FreePhysicalPage(PhysicalMemoryInfo &physical_memory) {
if (physical_memory.handle != nullptr) {
(void) aclrtFreePhysical(physical_memory.handle);
physical_memory.handle = nullptr;
physical_memory_size_ -= page_size_;
HP_LOGI("aclrtFreePhysical success pa_index:%zu, current physical memory size:%zu, pg_type:%s",
physical_memory.index, physical_memory_size_, GetPgType(prop_.pg_type).c_str());
}
}
Status PhysicalMemoryAllocator::MallocPhysicalPage(const std::string &purpose, size_t &pa_index, const void *const va,
bool reuse) {
PhysicalMemoryInfo malloc_physical_memory;
pa_index = physical_memorys_.size();
malloc_physical_memory.index = pa_index;
malloc_physical_memory.ref_count = 1UL;
rtDrvMemHandle handle = nullptr;
auto ret = rtMallocPhysical(&handle, page_size_, &prop_, 0U);
if (ret != RT_ERROR_NONE) {
if (prop_.pg_type == ge::kDrvMemPropPgType1G) {
const bool use_1g_only = VarManager::IsVariableUse1gHugePageOnly();
const auto failed_log = use_1g_only ? k1gHugePageOnlyMallocFail : k1gHugePageFirstMallocFail;
REPORT_INNER_ERR_MSG("E19999", "call rtMallocPhysical failed, size:%zu, pg_type: %s, %s, ret=%d", page_size_,
GetPgType(prop_.pg_type).c_str(), failed_log.c_str(), static_cast<int>(ret));
GELOGE(FAILED, "call rtMallocPhysical failed, size:%zu, pg_type: %s, %s", page_size_,
GetPgType(prop_.pg_type).c_str(), failed_log.c_str());
if (use_1g_only) {
return FAILED;
}
prop_.pg_type = kDrvMemPropPgType2M;
ret = rtMallocPhysical(&handle, page_size_, &prop_, 0U);
}
}
if (ret != RT_ERROR_NONE) {
REPORT_INNER_ERR_MSG("E19999", "call rtMallocPhysical failed, size:%zu, pg_type: %s, ret=%d", page_size_,
GetPgType(prop_.pg_type).c_str(), static_cast<int>(ret));
GELOGE(FAILED, "call rtMallocPhysical failed, size:%zu, pg_type: %s", page_size_,
GetPgType(prop_.pg_type).c_str());
return FAILED;
}
malloc_physical_memory.handle = handle;
physical_memorys_.emplace_back(std::move(malloc_physical_memory));
physical_memory_size_ += page_size_;
GE_PRINT_DYNAMIC_MEMORY(aclrtMalloc, ToMallocMemInfo(purpose, va, device_id_, GE_MODULE_NAME_U16).c_str(), page_size_);
HP_LOGI("rtMallocPhysical success pa_index:%zu, current physical memory size:%zu reuse:%d, memory_type:%u,"
" pg_type:%s.", pa_index, physical_memory_size_, reuse, memory_type_,
GetPgType(prop_.pg_type).c_str());
return SUCCESS;
}
Status PhysicalMemoryAllocator::MallocPhysical(const std::string &purpose, size_t &pa_index, void *const va,
bool reuse) {
const std::lock_guard<std::recursive_mutex> lock(mutex_);
if (pa_index < physical_memorys_.size()) {
auto &physical_memory = physical_memorys_[pa_index];
if (physical_memory.handle != nullptr) {
if (physical_memory.ref_count == 0U) {
pa_index = physical_memory.index;
physical_memory.ref_count++;
HP_LOGI("reuse self pa_index:%zu.", pa_index);
return SUCCESS;
} else {
auto it = physical_memory.map_addrs.find(va);
GE_ASSERT_TRUE(it != physical_memory.map_addrs.end());
physical_memory.map_addrs.erase(it);
(void) aclrtUnmapMem(va);
HP_LOGI("aclrtUnmapMem success pa_index:%zu va:%p.", pa_index, va);
}
}
}
while(reuse && (!free_physical_memorys_.empty())) {
auto &physical_memory = physical_memorys_[free_physical_memorys_.back()];
free_physical_memorys_.pop_back();
physical_memory.in_free_list = false;
if ((physical_memory.ref_count == 0U) && (physical_memory.handle != nullptr)) {
physical_memory.ref_count++;
HP_LOGI("get from pool pa_index:%zu.", physical_memory.index);
pa_index = physical_memory.index;
return SUCCESS;
}
}
GE_ASSERT_SUCCESS(MallocPhysicalPage(purpose, pa_index, va, reuse));
return SUCCESS;
}
Status PhysicalMemoryAllocator::FreePhysical(size_t pa_index) {
const std::lock_guard<std::recursive_mutex> lock(mutex_);
GE_ASSERT_TRUE(pa_index < physical_memorys_.size());
auto &physical_memory = physical_memorys_[pa_index];
if (physical_memory.ref_count > 0U) {
physical_memory.ref_count--;
}
if ((!physical_memory.in_free_list) && (physical_memory.ref_count == 0U)
&& (physical_memory.handle != nullptr)) {
free_physical_memorys_.emplace_back(physical_memory.index);
physical_memory.in_free_list = true;
HP_LOGI("put to pool success pa_index:%zu.", pa_index);
} else {
HP_LOGI("pa_index:%zu in_free_list:%d ref_count:%zu.", pa_index,
physical_memory.in_free_list, physical_memory.ref_count);
}
return SUCCESS;
}
Status PhysicalMemoryAllocator::AddPageRecord(const size_t pa_index, const PageRecord &page_record) {
const std::lock_guard<std::recursive_mutex> lock(mutex_);
GE_ASSERT_TRUE(pa_index < physical_memorys_.size(), "pa_index: %zu, physical_memorys_ size: %zu",
pa_index, physical_memorys_.size());
auto &va_record_table = physical_memorys_[pa_index].va_record_table;
if (!va_record_table.empty()) {
const auto &first_record = *va_record_table.begin();
GE_ASSERT_TRUE(first_record.map_va == page_record.map_va, "pa_index[%zu] is using. using va info: %s, new va info: "
"%s", pa_index, first_record.ToString().c_str(), page_record.ToString().c_str());
for (const auto &using_va : va_record_table) {
if (using_va.head_offset != page_record.head_offset) {
GE_ASSERT_TRUE(!HasIntersection(using_va, page_record), "va intersection, pa_index[%zu]. using va info: %s, "
"new va info: %s", pa_index, using_va.ToString().c_str(), page_record.ToString().c_str());
}
}
}
va_record_table.emplace_back(page_record);
return SUCCESS;
}
void PhysicalMemoryAllocator::DelPageRecord(const size_t pa_index, const PageRecord &page_record) {
const std::lock_guard<std::recursive_mutex> lock(mutex_);
if (pa_index < physical_memorys_.size()) {
auto &va_record_table = physical_memorys_[pa_index].va_record_table;
for (auto iter = va_record_table.begin(); iter < va_record_table.end(); ++iter) {
if ((iter->head_offset == page_record.head_offset) &&
(iter->using_size == page_record.using_size)) {
va_record_table.erase(iter);
return;
}
}
}
}
Status PhysicalMemoryAllocator::MapMem(void *const va, size_t pa_index) {
const std::lock_guard<std::recursive_mutex> lock(mutex_);
GE_ASSERT_TRUE(pa_index < physical_memorys_.size());
auto &physical_memory = physical_memorys_[pa_index];
auto it = physical_memory.map_addrs.find(va);
if (it != physical_memory.map_addrs.end()) {
HP_LOGI("pa_index:%zu addr:%p has been mapped.", pa_index, *it);
return SUCCESS;
}
if (aclrtMapMem(va, page_size_, 0U, physical_memory.handle, 0U) != ACL_SUCCESS) {
REPORT_INNER_ERR_MSG("E19999", "call aclrtMapMem failed, map_addr:%p.", va);
GELOGE(FAILED, "call aclrtMapMem failed, map_addr:%p.", va);
return FAILED;
}
physical_memory.map_addrs.insert(va);
HP_LOGI("aclrtMapMem success pa_index:%zu addr:%p.", pa_index, va);
return SUCCESS;
}
Status PhysicalMemoryAllocator::UnmapMem(void *const va, size_t pa_index) {
const std::lock_guard<std::recursive_mutex> lock(mutex_);
GE_ASSERT_TRUE(pa_index < physical_memorys_.size());
auto &physical_memory = physical_memorys_[pa_index];
auto it = physical_memory.map_addrs.find(va);
GE_ASSERT_TRUE(it != physical_memory.map_addrs.end());
HP_LOGI("pa_index:%zu addr:%p will be delay unmapped.", pa_index, *it);
return SUCCESS;
}
Status PhysicalMemoryAllocator::ReserveMemAddress(void **va, size_t size) const {
GE_WARN_ASSERT(aclrtReserveMemAddress(va, size, 0U, nullptr, 1U) == ACL_SUCCESS);
HP_LOGI("aclrtReserveMemAddress success va:%p size:%zu, device:%u, memory_type:%u.", *va, size, device_id_, memory_type_);
return SUCCESS;
}
Status PhysicalMemoryAllocator::ReleaseMemAddress(void *const va, size_t size) {
const std::lock_guard<std::recursive_mutex> lock(mutex_);
for (auto &physical_memory : physical_memorys_) {
for (auto it = physical_memory.map_addrs.begin(); it != physical_memory.map_addrs.end();) {
const auto map_addr = *it;
const auto map_addr_value = PtrToValue(map_addr);
const auto base_addr_value = PtrToValue(va);
if ((map_addr_value >= base_addr_value) && ((map_addr_value - base_addr_value) < size)) {
(void) aclrtUnmapMem(map_addr);
it = physical_memory.map_addrs.erase(it);
HP_LOGI("aclrtUnmapMem pa_index:%zu addr:%p.", physical_memory.index, map_addr);
} else {
++it;
}
HP_LOGD("pa_index:%zu map_addrs:%zu addr:%p.", physical_memory.index, physical_memory.map_addrs.size(), map_addr);
}
}
GE_WARN_ASSERT(aclrtReleaseMemAddress(va) == ACL_SUCCESS);
HP_LOGI("aclrtReleaseMemAddress success va:%p size:%zu, device:%u, memory_type:%u.", va, size, device_id_, memory_type_);
return SUCCESS;
}
PhysicalMemoryAllocatorMgr &PhysicalMemoryAllocatorMgr::Instance() {
static PhysicalMemoryAllocatorMgr instance;
return instance;
}
std::shared_ptr<PhysicalMemoryAllocator> PhysicalMemoryAllocatorMgr::CreateAllocator(uint32_t device_id,
rtMemType_t memory_type,
size_t page_size) {
const std::lock_guard<std::recursive_mutex> lock(mutex_);
auto dev_it = physical_memory_allocators_.find(device_id);
if (dev_it != physical_memory_allocators_.end()) {
auto type_it = dev_it->second.find(memory_type);
if (type_it != dev_it->second.end()) {
auto it = type_it->second.find(page_size);
if (it != type_it->second.end()) {
GELOGI("Reuse PhysicalMemoryAllocator success device id:%u, memory_type: %u, page_size: %zu.",
device_id, memory_type, page_size);
return it->second;
}
}
}
auto allocator = ge::MakeShared<PhysicalMemoryAllocator>(device_id, memory_type);
GE_ASSERT_TRUE(allocator != nullptr);
physical_memory_allocators_[device_id][memory_type][page_size] = allocator;
GELOGI("Create PhysicalMemoryAllocator success device id:%u, memory_type: %u, page_size: %zu.",
device_id, memory_type, page_size);
return allocator;
}
template <>
SessionMemAllocator<FixedBaseExpandableAllocator> &SessionMemAllocator<FixedBaseExpandableAllocator>::Instance() {
static SessionMemAllocator session_allocator;
return session_allocator;
}
template <>
SessionMemAllocator<ActiveMemoryAllocator> &SessionMemAllocator<ActiveMemoryAllocator>::Instance() {
static SessionMemAllocator session_allocator;
return session_allocator;
}
template <>
SessionMemAllocator<ExpandableActiveMemoryAllocator> &SessionMemAllocator<ExpandableActiveMemoryAllocator>::Instance() {
static SessionMemAllocator session_allocator;
return session_allocator;
}
}