* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
#include "graph/load/model_manager/model_utils.h"
#include "graph/load/model_manager/memory_app_type_classifier.h"
#include <string>
#include "mmpa/mmpa_api.h"
#include "graph_metadef/common/plugin/plugin_manager.h"
#include "framework/common/op/ge_op_utils.h"
#include "framework/common/framework_types_internal.h"
#include "graph_metadef/graph/utils/file_utils.h"
#include "graph/utils/tensor_utils.h"
#include "graph/utils/type_utils.h"
#include "graph/manager/graph_var_manager.h"
#include "graph/manager/mem_manager.h"
#include "graph/ge_context.h"
#include "graph/utils/graph_utils.h"
#include "framework/common/runtime_tensor_desc.h"
#include "base/err_msg.h"
#include "acl/acl_rt.h"
namespace ge {
namespace {
constexpr int32_t kSessionNoReuse = 1;
constexpr uint64_t kSessionScopeMemoryMask = 0x100000000UL;
constexpr uint64_t kMemoryTypeMask = 0xFFFFFFFFUL;
constexpr uint32_t kInvalidDeviceId = UINT32_MAX;
constexpr char_t const *kUsedStreamNum = "used_stream_num";
constexpr char_t const *kWorkSpace = "workspace";
constexpr char_t const *kInner = "built-in";
constexpr char_t const *kVendors = "vendors";
constexpr char_t const *kOpImpl = "op_impl";
constexpr char_t const *kLegacySoSuffix = "_legacy.so";
static thread_local uint32_t load_so_count = 0;
uint64_t GetWorkspaceMemTypeByPriority(const bool is_p2p_memory, const bool is_l1_memory, const bool is_ub_memory,
const bool session_scope_memory) {
if (is_p2p_memory) {
return RT_MEMORY_P2P_DDR;
}
if (is_l1_memory) {
return RT_MEMORY_L1;
}
if (is_ub_memory) {
return kRtMemoryUB;
}
if (session_scope_memory) {
return kSessionScopeMemoryMask | RT_MEMORY_HBM;
}
return RT_MEMORY_HBM;
}
Status GetMaxVarMemSize(const RuntimeParam &runtime_param, uint64_t &max_var_mem_size) {
const auto var_manager = VarManager::Instance(runtime_param.session_id);
GE_CHECK_NOTNULL(var_manager);
max_var_mem_size = static_cast<uint64_t>(var_manager->GetVarMemSize(RT_MEMORY_HBM)) + \
static_cast<uint64_t>(var_manager->GetVarConstPlaceHolderMemSize(RT_MEMORY_HBM));
max_var_mem_size = (max_var_mem_size == 0LU) ? kMemoryVarAddressSize : max_var_mem_size;
GELOGI("GetMaxVarMemSize max_var_mem_size = %" PRIu64 ".", max_var_mem_size);
return SUCCESS;
}
size_t GetVarConstPlaceHolderMemSize(const RuntimeParam &runtime_param) {
size_t var_cph_mem_size = 0L;
const auto var_manager = VarManager::Instance(runtime_param.session_id);
if (var_manager != nullptr) {
var_cph_mem_size = static_cast<size_t>(var_manager->GetVarConstPlaceHolderMemSize(RT_MEMORY_HBM));
}
GELOGI("GetVarConstPlaceHolderMemSize var_cph_mem_size = %zu.", var_cph_mem_size);
return var_cph_mem_size;
}
void FreeP2pMem(const uint32_t device_id, RuntimeParam &runtime_param,
std::pair<const uint64_t, MemInfo> &mem_type_info) {
const auto memory_type = static_cast<rtMemType_t>(mem_type_info.first & kMemoryTypeMask);
if ((mem_type_info.second.memory_base != nullptr)
&& (mem_type_info.second.memory_base != PtrToPtr<void, uint8_t>(ValueToPtr(runtime_param.p2p_fixed_mem_base)))) {
auto &mem_instance = MemManager::Instance().MemInstance(memory_type);
GE_CHK_STATUS(mem_instance.FreeMemory(mem_type_info.second.memory_base, device_id), "failed to free memory");
mem_type_info.second.memory_base = nullptr;
}
}
const std::map<OppImplVersion, std::string> kVersion2VendorPath{{OppImplVersion::kOpp, "/opp/"},
{OppImplVersion::kOppKernel, "/opp_latest/"}};
void GetSpecificSoBins(const ge::GeRootModelPtr &root_model, const SoBinType so_bin_type,
std::vector<OpSoBinPtr> &so_list) {
const auto &all_so_list = root_model->GetAllSoBin();
std::for_each(all_so_list.begin(), all_so_list.end(), [&so_list, so_bin_type](const OpSoBinPtr &so_bin) -> void {
if (so_bin->GetSoBinType() == so_bin_type) {
so_list.emplace_back(so_bin);
}
});
std::stable_partition(so_list.begin(), so_list.end(), [](const OpSoBinPtr &so_bin) {
const std::string so_bin_name = so_bin->GetSoName();
return so_bin_name.size() < std::strlen(kLegacySoSuffix) ||
so_bin_name.compare((so_bin_name.size() - std::strlen(kLegacySoSuffix)), std::strlen(kLegacySoSuffix), kLegacySoSuffix) != 0;
});
}
}
bool ModelUtils::ValidateMemRange(const ConstOpDescPtr &op_desc, const uint64_t total_size, const int64_t offset,
const int64_t size) {
if (CheckInt64AddOverflow(offset, size) != SUCCESS) {
GELOGE(PARAM_INVALID, "Int64 %" PRId64 " and %" PRId64 " addition can result in overflow!", offset, size);
return false;
}
const int64_t mem_range = offset + size;
if (total_size < static_cast<uint64_t>(mem_range)) {
REPORT_INNER_ERR_MSG("E19999", "Node:%s(%s) memory out of range, offset:%" PRId64 ", size:"
"%" PRId64 ", exceed total size:%" PRIu64 ".", op_desc->GetName().c_str(),
op_desc->GetType().c_str(), offset, size, total_size);
GELOGE(OUT_OF_MEMORY, "[Check][Param]Node:%s(%s) memory out of range, offset:%" PRId64
", size:%" PRId64 ", exceed total size:%" PRIu64 ".",
op_desc->GetName().c_str(), op_desc->GetType().c_str(), offset, size, total_size);
return false;
}
return true;
}
Status ModelUtils::CreateOmOppDir(std::string &opp_dir) {
opp_dir.clear();
GE_ASSERT_SUCCESS(ge::GetAscendWorkPath(opp_dir));
if (opp_dir.empty()) {
const ge::char_t *path_env = nullptr;
MM_SYS_GET_ENV(MM_ENV_HOME, path_env);
GE_ASSERT_NOTNULL(path_env);
GE_ASSERT_TRUE(strnlen(path_env, static_cast<size_t>(MMPA_MAX_PATH)) > 0U);
const std::string file_path = ge::RealPath(path_env);
GE_ASSERT_TRUE(!file_path.empty());
opp_dir = file_path;
}
if (opp_dir.back() != '/') {
opp_dir += '/';
}
opp_dir += ".ascend_temp/.om_exe_data/"
+ std::to_string(mmGetPid())
+ "_" + std::to_string(mmGetTid())
+ "_" + std::to_string(load_so_count++)
+ "/";
GELOGD("opp_dir is %s", opp_dir.c_str());
GE_ASSERT_TRUE(mmAccess2(opp_dir.c_str(), M_F_OK) != EN_OK);
GE_ASSERT_TRUE(ge::CreateDir(opp_dir) == 0);
return ge::GRAPH_SUCCESS;
}
Status ModelUtils::RmOmOppDir(const std::string &opp_dir) {
GELOGD("Start to rm opp dir %s", opp_dir.c_str());
if (!opp_dir.empty()) {
GE_ASSERT_TRUE(mmRmdir(opp_dir.c_str()) == 0, "remove dir [%s] failed!", opp_dir.c_str());
}
GELOGI("Remove dir success, opp_dir is %s", opp_dir.c_str());
return ge::GRAPH_SUCCESS;
}
ge::graphStatus ModelUtils::SaveToFile(const std::shared_ptr<ge::OpSoBin> &so_bin, const std::string &opp_path) {
constexpr mmMode_t kAccess = static_cast<mmMode_t>(static_cast<uint32_t>(M_IRUSR) | static_cast<uint32_t>(M_IWUSR) |
static_cast<uint32_t>(M_UMASK_USREXEC));
const int32_t fd = mmOpen2(opp_path.c_str(),
static_cast<int32_t>(static_cast<uint32_t>(M_WRONLY) | static_cast<uint32_t>(M_CREAT) |
static_cast<uint32_t>(O_TRUNC)),
kAccess);
GELOGD("Prepare to save so: [%s], the save path: [%s]", so_bin->GetSoName().c_str(), opp_path.c_str());
GE_MAKE_GUARD(close_opp_path, [&fd]() -> void { mmClose(fd); });
GE_ASSERT(fd >= 0, "open file [%s] failed!", opp_path.c_str());
const int32_t write_count =
mmWrite(fd, const_cast<uint8_t *>(so_bin->GetBinData()), static_cast<uint32_t>(so_bin->GetBinDataSize()));
GE_ASSERT_TRUE((write_count != EN_INVALID_PARAM) && (write_count != EN_ERROR), "write file [%s] failed!",
opp_path.c_str());
return ge::GRAPH_SUCCESS;
}
std::vector<int64_t> ModelUtils::GetInputSize(const ConstOpDescPtr &op_desc) {
std::vector<int64_t> v_input_size;
GE_CHECK_NOTNULL_EXEC(op_desc, return v_input_size);
const size_t inputs_size = op_desc->GetAllInputsSize();
for (size_t i = 0U; i < inputs_size; ++i) {
const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i));
if (tensor_desc == nullptr) {
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i);
continue;
}
int64_t tensor_size = 0;
GE_IF_BOOL_EXEC(
TensorUtils::GetSize(*tensor_desc, tensor_size) != GRAPH_SUCCESS,
GELOGI("Tensor has no size, op: %s, input index: %zu", op_desc->GetName().c_str(), i);
continue);
GELOGI("GetInputSize op:[%s], index:[%zu], size:[%" PRId64 "]", op_desc->GetName().c_str(), i, tensor_size);
v_input_size.push_back(tensor_size);
}
return v_input_size;
}
std::vector<int64_t> ModelUtils::GetOutputSize(const ConstOpDescPtr &op_desc) {
std::vector<int64_t> v_output_size;
GE_CHECK_NOTNULL_EXEC(op_desc, return v_output_size);
const size_t outputs_size = op_desc->GetOutputsSize();
const std::vector<int64_t> v_output_offset = op_desc->GetOutputOffset();
GE_IF_BOOL_EXEC(v_output_offset.size() != outputs_size,
GELOGW("Output param invalid: output_offset=%zu, outputs=%zu.", v_output_offset.size(), outputs_size);
return v_output_size);
for (size_t i = 0U; i < outputs_size; ++i) {
const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(static_cast<uint32_t>(i));
if (tensor_desc == nullptr) {
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i);
continue;
}
int64_t string_max_size = 0;
if ((tensor_desc->GetDataType() == DT_STRING) && AttrUtils::GetInt(op_desc, "_op_max_size", string_max_size)) {
GELOGI("Get op max size value = %" PRId64, string_max_size);
v_output_size.push_back(string_max_size);
continue;
}
int64_t tensor_size = 0;
GE_IF_BOOL_EXEC(
TensorUtils::GetSize(*tensor_desc, tensor_size) != GRAPH_SUCCESS,
GELOGI("Tensor has no size, op: %s, output index: %zu", op_desc->GetName().c_str(), i);
continue);
GELOGD("GetOutputSize op:[%s], index:[%zu], size:[%" PRId64 "]", op_desc->GetName().c_str(), i, tensor_size);
v_output_size.push_back(tensor_size);
}
return v_output_size;
}
std::vector<int64_t> ModelUtils::GetWorkspaceSize(const ConstOpDescPtr &op_desc) {
std::vector<int64_t> v_workspace_size;
GE_CHECK_NOTNULL_EXEC(op_desc, return v_workspace_size);
const std::vector<int64_t> v_workspace_num = op_desc->GetWorkspace();
const std::vector<int64_t> v_workspace_bytes = op_desc->GetWorkspaceBytes();
if (v_workspace_num.size() != v_workspace_bytes.size()) {
GELOGW("workspace_num[%zu]!= workspace_bytes[%zu]", v_workspace_num.size(), v_workspace_bytes.size());
return v_workspace_size;
}
return v_workspace_bytes;
}
std::vector<int64_t> ModelUtils::GetWeightSize(const ConstOpDescPtr &op_desc) {
std::vector<int64_t> v_weight_size;
GE_CHECK_NOTNULL_EXEC(op_desc, return v_weight_size);
const std::string type_name = op_desc->GetType();
if ((type_name == CONSTANT) || (type_name == CONSTANTOP)) {
ConstGeTensorPtr weight = nullptr;
if (AttrUtils::GetTensor(*op_desc, ATTR_NAME_WEIGHTS, weight)) {
v_weight_size.push_back(static_cast<int64_t>(TensorUtils::GetWeightSize(weight)));
}
return v_weight_size;
}
const size_t inputs_size = op_desc->GetAllInputsSize();
const vector_bit_t &v_is_input_const = op_desc->GetIsInputConst();
for (size_t i = 0U; i < inputs_size; ++i) {
if ((i < v_is_input_const.size()) && v_is_input_const[i]) {
const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i));
if (tensor_desc == nullptr) {
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i);
continue;
}
int64_t tensor_size = 0;
(void)TensorUtils::GetSize(*tensor_desc, tensor_size);
v_weight_size.push_back(tensor_size);
}
}
return v_weight_size;
}
std::vector<ConstGeTensorPtr> ModelUtils::GetWeights(const ConstOpDescPtr &op_desc) {
std::vector<ConstGeTensorPtr> v_weights;
GE_CHECK_NOTNULL_EXEC(op_desc, return v_weights);
const std::string op_type = op_desc->GetType();
if ((op_type == CONSTANT) || (op_type == CONSTANTOP)) {
ConstGeTensorPtr weight = nullptr;
if (AttrUtils::GetTensor(*op_desc, ATTR_NAME_WEIGHTS, weight)) {
v_weights.push_back(weight);
}
return v_weights;
}
const size_t inputs_size = op_desc->GetAllInputsSize();
const vector_bit_t &v_is_input_const = op_desc->GetIsInputConst();
for (size_t i = 0U; i < inputs_size; ++i) {
if ((i < v_is_input_const.size()) && v_is_input_const[i]) {
const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i));
if (tensor_desc == nullptr) {
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i);
continue;
}
ConstGeTensorPtr weight = nullptr;
if (AttrUtils::GetTensor(*tensor_desc, ATTR_NAME_WEIGHTS, weight)) {
v_weights.push_back(weight);
}
}
}
return v_weights;
}
std::vector<ccAICPUTensor> ModelUtils::GetInputDescs(const ConstOpDescPtr &op_desc) {
std::vector<ccAICPUTensor> v_input_descs;
GE_CHECK_NOTNULL_EXEC(op_desc, return v_input_descs);
const size_t inputs_size = op_desc->GetAllInputsSize();
const vector_bit_t &v_is_input_const = op_desc->GetIsInputConst();
for (size_t i = 0U; i < inputs_size; ++i) {
if ((i < v_is_input_const.size()) && v_is_input_const[i]) {
continue;
}
const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i));
if (tensor_desc == nullptr) {
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i);
continue;
}
uint32_t dim_cnt = 0U;
if (TensorUtils::GetRealDimCnt(*tensor_desc, dim_cnt) != GRAPH_SUCCESS) {
GELOGW("Get dim_cnt unsuccessful");
continue;
}
ccAICPUTensor tmp{};
tmp.format = static_cast<tagOpTensorFormat>(tensor_desc->GetFormat());
tmp.dim_cnt = static_cast<int32_t>(dim_cnt);
tmp.data_type = static_cast<tagOpDataType>(tensor_desc->GetDataType());
for (int32_t j = 0; j < 4; j++) {
const int64_t tensor_dim = tensor_desc->GetShape().GetDim(static_cast<size_t>(j));
if (tensor_dim > INT32_MAX) {
GELOGW("Op[%s], input tensor[%zu], dim[%d]: tensor_dim[%" PRId64 "] is greater than INT32_MAX[%d]",
op_desc->GetName().c_str(), i, j, tensor_dim, INT32_MAX);
}
tmp.dim[j] = (j < tmp.dim_cnt) ? static_cast<int32_t>(tensor_dim) : 1;
}
v_input_descs.push_back(tmp);
}
return v_input_descs;
}
std::vector<ccAICPUTensor> ModelUtils::GetOutputDescs(const ConstOpDescPtr &op_desc) {
std::vector<ccAICPUTensor> v_output_descs;
GE_CHECK_NOTNULL_EXEC(op_desc, return v_output_descs);
const size_t output_num = op_desc->GetOutputsSize();
for (size_t i = 0UL; i < output_num; ++i) {
const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(static_cast<uint32_t>(i));
if (tensor_desc == nullptr) {
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i);
continue;
}
uint32_t dim_cnt = 0U;
if (TensorUtils::GetRealDimCnt(*tensor_desc, dim_cnt) != GRAPH_SUCCESS) {
GELOGW("Get dim_cnt failed");
continue;
}
ccAICPUTensor tmp{};
tmp.format = static_cast<tagOpTensorFormat>(tensor_desc->GetFormat());
tmp.dim_cnt = static_cast<int32_t>(dim_cnt);
tmp.data_type = static_cast<tagOpDataType>(tensor_desc->GetDataType());
for (int32_t j = 0; j < 4; j++) {
const int64_t tensor_dim = tensor_desc->GetShape().GetDim(static_cast<size_t>(j));
if (tensor_dim > INT32_MAX) {
GELOGW("Op[%s], output tensor[%zu], dim[%d]: tensor_dim[%" PRId64 "] is greater than INT32_MAX[%d]",
op_desc->GetName().c_str(), i, j, tensor_dim, INT32_MAX);
}
tmp.dim[j] = (j < tmp.dim_cnt) ? static_cast<int32_t>(tensor_dim) : 1;
}
v_output_descs.push_back(tmp);
}
return v_output_descs;
}
std::vector<void *> ModelUtils::GetInputAddrs(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc) {
std::vector<uint64_t> mem_type;
return GetInputAddrs(model_param, op_desc, mem_type);
}
std::vector<void *> ModelUtils::GetInputAddrs(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc,
std::vector<uint64_t> &mem_type,
const bool has_optional_addr) {
GELOGD("Start GetInputAddrs: op_name[%s].", op_desc->GetName().c_str());
auto v_input_addr = GetInputDataAddrs(model_param, op_desc, mem_type, has_optional_addr);
if (GetInputOutputDescAddrs(
model_param, op_desc, op_desc->GetAllInputsDescPtr(), mem_type, v_input_addr) != SUCCESS) {
GELOGE(PARAM_INVALID, "[Check] GetInputOutputDescAddrs failed: op_name[%s]", op_desc->GetName().c_str());
return {};
}
return v_input_addr;
}
std::vector<uint64_t> ModelUtils::GetInputAddrsValue(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc) {
std::vector<uint64_t> mem_type;
return GetInputAddrsValue(model_param, op_desc, mem_type);
}
std::vector<uint64_t> ModelUtils::GetInputAddrsValue(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc,
std::vector<uint64_t> &mem_type,
const bool has_optional_addr) {
GELOGD("Start GetInputAddrsValue: op_name[%s]", op_desc->GetName().c_str());
return VPtrToValue(GetInputAddrs(model_param, op_desc, mem_type, has_optional_addr));
}
Status ModelUtils::RefreshAddressByMemType(const RuntimeParam &model_param, const NodeMemInfo &node_mem_info,
void *&mem_addr) {
switch (node_mem_info.mem_type_) {
case RT_MEMORY_L1:
case kRtMemoryUB:
mem_addr = ValueToPtr(static_cast<uint64_t>(node_mem_info.logical_offset_));
break;
case RT_MEMORY_TS:
if (!ValidateMemRange(node_mem_info.op_desc_, model_param.mem_size, node_mem_info.logical_offset_, 0)) {
return FAILED;
}
mem_addr =
model_param.ts_mem_mall->Acquire(node_mem_info.logical_offset_, static_cast<uint64_t>(node_mem_info.size_));
break;
case kSessionScopeMemoryMask | RT_MEMORY_HBM:
case RT_MEMORY_HOST:
case RT_MEMORY_HOST_SVM:
case RT_MEMORY_P2P_DDR: {
const auto &mem_info =
model_param.memory_infos.at(node_mem_info.mem_type_);
mem_addr = mem_info.GetMemory(node_mem_info.logical_offset_, node_mem_info.size_);
break;
}
case RT_MEMORY_HBM:
case RT_MEMORY_L2:
case RT_MEMORY_DEFAULT:
if ((node_mem_info.size_ <= 0) && (node_mem_info.io_type_ == kWorkSpace)) {
return SUCCESS;
}
if (!ValidateMemRange(node_mem_info.op_desc_, model_param.mem_size, node_mem_info.logical_offset_, 0)) {
return FAILED;
}
mem_addr = model_param.GetMemAddr(node_mem_info.logical_offset_);
break;
default:
GELOGE(FAILED, "mem_type %" PRIu64 " is not supported for now.", node_mem_info.mem_type_);
return FAILED;
}
return SUCCESS;
}
std::vector<uint64_t> ModelUtils::GetInputDataAddrsValue(const RuntimeParam &model_param,
const ConstOpDescPtr &op_desc) {
std::vector<uint64_t> mem_type;
return GetInputDataAddrsValue(model_param, op_desc, mem_type);
}
std::vector<uint64_t> ModelUtils::GetInputDataAddrsValue(const RuntimeParam &model_param,
const ConstOpDescPtr &op_desc,
std::vector<uint64_t> &mem_type,
const bool has_optional_addr) {
return VPtrToValue(GetInputDataAddrs(model_param, op_desc, mem_type, has_optional_addr));
}
std::vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc) {
std::vector<uint64_t> mem_type;
return GetInputDataAddrs(model_param, op_desc, mem_type);
}
std::vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc,
std::vector<uint64_t> &mem_type,
const bool has_optional_addr) {
std::vector<void *> v_input_data_addr;
GE_CHECK_NOTNULL_EXEC(op_desc, return v_input_data_addr);
const uint64_t session_id = model_param.session_id;
GELOGD("Print Session Id:%" PRIu64 ", op name[%s]", session_id, op_desc->GetName().c_str());
GE_CHECK_NOTNULL_EXEC(VarManager::Instance(session_id), return v_input_data_addr);
const size_t inputs_size = op_desc->GetInputsSize();
const std::vector<int64_t> v_input_offset = op_desc->GetInputOffset();
const vector_bit_t &v_is_input_const = op_desc->GetIsInputConst();
size_t non_const_index = 0UL;
size_t valid_input_count = 0UL;
std::vector<int64_t> v_memory_type;
const bool has_mem_type_attr = AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, v_memory_type);
const bool check_failed = has_mem_type_attr && (v_memory_type.size() != inputs_size);
if (check_failed) {
REPORT_INNER_ERR_MSG("E19999", "Attr:%s, memory_type.size:%zu != input_desc.size:%zu, op:%s(%s), check invalid",
ATTR_NAME_INPUT_MEM_TYPE_LIST.c_str(), v_memory_type.size(), inputs_size,
op_desc->GetName().c_str(), op_desc->GetType().c_str());
GELOGE(PARAM_INVALID, "[Check][Param] Attr:%s, memory_type.size:%zu != input_desc.size:%zu, op:%s(%s)",
ATTR_NAME_INPUT_MEM_TYPE_LIST.c_str(), v_memory_type.size(), inputs_size,
op_desc->GetName().c_str(), op_desc->GetType().c_str());
return v_input_data_addr;
}
v_input_data_addr.reserve(inputs_size);
for (size_t i = 0U; i < op_desc->GetAllInputsSize(); ++i) {
const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i));
if (tensor_desc == nullptr) {
if (has_optional_addr) {
v_input_data_addr.push_back(nullptr);
mem_type.push_back(kFixMemType);
}
GELOGI(
"Op: %s, Index: %zu, has no input, is optional holder: %d", op_desc->GetName().c_str(), i, has_optional_addr);
continue;
}
valid_input_count++;
int64_t tensor_size = 0;
GE_CHK_STATUS_EXEC(TensorUtils::GetSize(*tensor_desc, tensor_size), return {});
if ((i < v_is_input_const.size()) && v_is_input_const[i]) {
int64_t data_offset = 0;
GE_CHK_STATUS(TensorUtils::GetDataOffset(*tensor_desc, data_offset));
int64_t weight_size = 0;
GE_CHK_STATUS(TensorUtils::GetTensorSizeInBytes(*tensor_desc, weight_size));
GE_IF_BOOL_EXEC(!ValidateMemRange(op_desc, model_param.weight_size, data_offset, weight_size), return {});
void *const weight_addr = ValueToPtr(model_param.weight_base + static_cast<uint64_t>(data_offset));
v_input_data_addr.push_back(weight_addr);
mem_type.push_back(kWeightMemType);
GELOGI("[IMAS]GetInputDataAddrs graph_%u type[C] name[%s] input[%zu] size[%" PRId64 "] memaddr[%p]",
model_param.graph_id, op_desc->GetName().c_str(), i, weight_size, weight_addr);
non_const_index++;
continue;
}
GE_IF_BOOL_EXEC(non_const_index >= v_input_offset.size(), break);
const int64_t input_offset = v_input_offset[non_const_index];
const auto iter = model_param.fileconstant_addr_mapping.find(input_offset);
if (iter != model_param.fileconstant_addr_mapping.end()) {
v_input_data_addr.push_back(reinterpret_cast<void *>(iter->second));
mem_type.push_back(kConstantMemType);
non_const_index++;
continue;
}
non_const_index++;
int64_t inner_offset = 0;
(void)AttrUtils::GetInt(op_desc->MutableInputDesc(static_cast<uint32_t>(i)), ATTR_NAME_INNER_OFFSET, inner_offset);
const bool is_check_var_manager = (GetVarConstPlaceHolderMemSize(model_param) > 0U || model_param.var_size > 0U);
if (is_check_var_manager && (VarManager::Instance(session_id)->IsVarAddr(input_offset - inner_offset))) {
void *variable_addr = nullptr;
if (CheckInt64AddOverflow(tensor_size, inner_offset) != SUCCESS) {
return {};
}
if (GetVarAddr(model_param, input_offset - inner_offset, variable_addr) != SUCCESS) {
return {};
}
variable_addr = ValueToPtr(PtrToValue(variable_addr) + static_cast<uint64_t>(inner_offset));
v_input_data_addr.push_back(variable_addr);
mem_type.push_back(kVarMemType);
GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%zu] memaddr[%p]",
model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr);
continue;
}
int64_t tensor_mem_type = -1;
const bool tensor_has_mem_type = AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, tensor_mem_type);
uint64_t memory_type(RT_MEMORY_DEFAULT);
if (tensor_has_mem_type) {
memory_type = static_cast<uint64_t>(tensor_mem_type);
} else if (v_memory_type.size() >= valid_input_count) {
memory_type = static_cast<uint64_t>(v_memory_type[valid_input_count - 1UL]);
} else {
}
const NodeMemInfo node_mem_info{memory_type, op_desc, i, "input", tensor_size, input_offset};
void *mem_addr = nullptr;
if (RefreshAddressByMemType(model_param, node_mem_info, mem_addr) != SUCCESS) {
GELOGE(FAILED, "[IMAS]get failed for graph_%u %s", model_param.graph_id,
node_mem_info.ToString().c_str());
return {};
}
GELOGI("[IMAS]graph_%u %s memaddr[%p]", model_param.graph_id, node_mem_info.ToString().c_str(), mem_addr);
v_input_data_addr.push_back(mem_addr);
mem_type.push_back(memory_type);
}
return v_input_data_addr;
}
Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const int64_t offset, void *&var_addr) {
Status ret = SUCCESS;
GE_CHECK_NOTNULL(VarManager::Instance(model_param.session_id));
const rtMemType_t mem_type = VarManager::Instance(model_param.session_id)->GetVarMemType(offset);
switch (mem_type) {
case RT_MEMORY_RDMA_HBM: {
if (offset < 0) {
REPORT_INNER_ERR_MSG("E19999", "Param offset:%" PRId64 " < 0, check invalid", offset);
GELOGE(PARAM_INVALID, "[Check][Param] Param offset:%" PRId64 " cannot be negative", offset);
ret = PARAM_INVALID;
break;
}
var_addr = ValueToPtr(static_cast<uint64_t>(offset));
break;
}
case RT_MEMORY_HBM: {
const auto &var_manager = VarManager::Instance(model_param.session_id);
GE_CHECK_NOTNULL(var_manager);
uint8_t *var_logic_addr = nullptr;
var_logic_addr = PtrToPtr<void, uint8_t>(ValueToPtr(static_cast<uint64_t>(offset)));
var_addr = var_manager->GetVarMemoryAddr(model_param.graph_name, var_logic_addr,
RT_MEMORY_HBM, model_param.device_id);
GE_CHK_BOOL_RET_STATUS(var_addr != nullptr, INTERNAL_ERROR, "[Get][VarAddr] failed.");
break;
}
default: {
REPORT_INNER_ERR_MSG("E19999", "Get mem_type:%u for offset:%" PRId64 " is unsupported, "
"check invalid", mem_type, offset);
GELOGE(PARAM_INVALID, "[Check][Param] Get mem_type:%d for offset:%" PRId64 " is unsupported, check invalid",
mem_type, offset);
ret = PARAM_INVALID;
break;
}
}
GE_CHECK_NOTNULL(var_addr);
return ret;
}
std::vector<void *> ModelUtils::GetOutputAddrs(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc) {
std::vector<uint64_t> mem_type;
return GetOutputAddrs(model_param, op_desc, mem_type);
}
std::vector<void *> ModelUtils::GetOutputAddrs(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc,
std::vector<uint64_t> &mem_type,
const bool has_optional_addr) {
GELOGD("Start GetOutputAddrs: op_name[%s].", op_desc->GetName().c_str());
auto v_output_addr = GetOutputDataAddrs(model_param, op_desc, mem_type, has_optional_addr);
if (GetInputOutputDescAddrs(
model_param, op_desc, op_desc->GetAllOutputsDescPtr(), mem_type, v_output_addr) != SUCCESS) {
GELOGE(PARAM_INVALID, "[Check] GetInputOutputDescAddrs failed: op_name[%s]", op_desc->GetName().c_str());
return {};
}
return v_output_addr;
}
std::vector<uint64_t> ModelUtils::GetOutputAddrsValue(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc) {
std::vector<uint64_t> mem_type;
return GetOutputAddrsValue(model_param, op_desc, mem_type);
}
std::vector<uint64_t> ModelUtils::GetOutputAddrsValue(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc,
std::vector<uint64_t> &mem_type,
const bool has_optional_addr) {
GELOGD("Start GetOutputAddrsValue: op_name[%s].", op_desc->GetName().c_str());
return VPtrToValue(GetOutputAddrs(model_param, op_desc, mem_type, has_optional_addr));
}
std::vector<uint64_t> ModelUtils::GetOutputDataAddrsValue(const RuntimeParam &model_param,
const ConstOpDescPtr &op_desc) {
std::vector<uint64_t> mem_type;
return GetOutputDataAddrsValue(model_param, op_desc, mem_type);
}
std::vector<uint64_t> ModelUtils::GetOutputDataAddrsValue(const RuntimeParam &model_param,
const ConstOpDescPtr &op_desc,
std::vector<uint64_t> &mem_type) {
return VPtrToValue(GetOutputDataAddrs(model_param, op_desc, mem_type));
}
std::vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc) {
std::vector<uint64_t> mem_type;
return GetOutputDataAddrs(model_param, op_desc, mem_type);
}
std::vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc,
std::vector<uint64_t> &mem_type,
const bool has_optional_addr) {
std::vector<void *> v_output_data_addr;
GE_CHECK_NOTNULL_EXEC(op_desc, return v_output_data_addr);
GELOGD("Start GetOutputDataAddrs: op_name[%s]", op_desc->GetName().c_str());
const uint64_t session_id = model_param.session_id;
GE_CHECK_NOTNULL_EXEC(VarManager::Instance(session_id), return v_output_data_addr);
const size_t outputs_size = op_desc->GetOutputsSize();
const std::vector<int64_t> v_output_offset = op_desc->GetOutputOffset();
GE_IF_BOOL_EXEC(v_output_offset.size() != outputs_size,
GELOGW("Output param invalid: output_offset=%zu, outputs=%zu.", v_output_offset.size(), outputs_size);
return v_output_data_addr);
std::vector<int64_t> v_memory_type;
const bool has_mem_type_attr = AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, v_memory_type);
if (has_mem_type_attr && (v_memory_type.size() != outputs_size)) {
REPORT_INNER_ERR_MSG("E19999", "Attr:%s, memory_type.size:%zu != output_desc.size:%zu, op:%s(%s), check invalid",
ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), v_memory_type.size(), outputs_size,
op_desc->GetName().c_str(), op_desc->GetType().c_str());
GELOGE(PARAM_INVALID, "[Check][Param] Attr:%s, memory_type.size:%zu != output_desc.size:%zu, op:%s(%s)",
ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), v_memory_type.size(), outputs_size,
op_desc->GetName().c_str(), op_desc->GetType().c_str());
return v_output_data_addr;
}
v_output_data_addr.reserve(outputs_size);
for (size_t i = 0U; i < outputs_size; ++i) {
const auto iter = model_param.fileconstant_addr_mapping.find(v_output_offset[i]);
if (iter != model_param.fileconstant_addr_mapping.end()) {
v_output_data_addr.push_back(reinterpret_cast<void *>(iter->second));
mem_type.push_back(kConstantMemType);
GELOGI("Find mapping existed. index:%zu key offset:%" PRId64 ", dev addr:%" PRIx64,
i, v_output_offset[i], iter->second);
continue;
}
const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(static_cast<uint32_t>(i));
if (tensor_desc == nullptr) {
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i);
continue;
}
if (TensorUtils::IsMemorySizeCalcTypeAlwaysEmpty(*tensor_desc)) {
if (has_optional_addr) {
v_output_data_addr.push_back(nullptr);
mem_type.push_back(kFixMemType);
}
GELOGD("%s is an optional output, has option addr:%d.",
op_desc->GetName().c_str(), static_cast<int32_t>(has_optional_addr));
continue;
}
int64_t inner_offset = 0;
(void)AttrUtils::GetInt(op_desc->MutableOutputDesc(static_cast<uint32_t>(i)), ATTR_NAME_INNER_OFFSET, inner_offset);
int64_t tensor_size = 0;
GE_CHK_STATUS_EXEC(TensorUtils::GetSize(*tensor_desc, tensor_size), return {});
const bool is_check_var_manager = (GetVarConstPlaceHolderMemSize(model_param) > 0U || model_param.var_size > 0U);
if (is_check_var_manager && (VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i] - inner_offset))) {
void *variable_addr = nullptr;
if (CheckInt64AddOverflow(tensor_size, inner_offset) != SUCCESS) {
return {};
}
if (GetVarAddr(model_param, v_output_offset[i] - inner_offset, variable_addr) != SUCCESS) {
return {};
}
variable_addr = ValueToPtr(PtrToValue(variable_addr) + static_cast<uint64_t>(inner_offset));
v_output_data_addr.push_back(variable_addr);
mem_type.push_back(kVarMemType);
GELOGI("[IMAS]graph_%u type[V] name[%s] output[%zu] memaddr[%p]",
model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr);
continue;
}
int64_t tensor_mem_type = -1;
const bool tensor_has_mem_type = AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, tensor_mem_type);
uint64_t memory_type(RT_MEMORY_DEFAULT);
if (tensor_has_mem_type) {
memory_type = static_cast<uint64_t>(tensor_mem_type);
} else if (has_mem_type_attr) {
memory_type = static_cast<uint64_t>(v_memory_type[i]);
} else {
}
const NodeMemInfo node_mem_info{memory_type, op_desc, i, "output", tensor_size, v_output_offset[i]};
void *mem_addr = nullptr;
if (RefreshAddressByMemType(model_param, node_mem_info, mem_addr) != SUCCESS) {
GELOGE(FAILED, "[IMAS]get failed for graph_%u %s", model_param.graph_id,
node_mem_info.ToString().c_str());
return {};
}
GELOGI("[IMAS]graph_%u %s memaddr[%p]", model_param.graph_id, node_mem_info.ToString().c_str(), mem_addr);
v_output_data_addr.push_back(mem_addr);
mem_type.push_back(memory_type);
}
return v_output_data_addr;
}
static Status FillSinkTensorDesc(RuntimeTensorDesc &sink_tensor_desc, const GeTensorDescPtr &tensor_desc,
const uint64_t data_addr) {
sink_tensor_desc.data_addr = data_addr;
sink_tensor_desc.dtype = static_cast<int64_t>(tensor_desc->GetDataType());
sink_tensor_desc.format = static_cast<int64_t>(tensor_desc->GetFormat());
const auto shape = tensor_desc->GetShape();
const int64_t dim_num = static_cast<int64_t>(shape.GetDimNum());
sink_tensor_desc.shape[0] = dim_num;
if (dim_num > kMaxDimSize) {
GELOGE(PARAM_INVALID, "shape dim size[%" PRId64 "] out of range[%zu]", dim_num, kMaxDimSize);
return FAILED;
}
for (int64_t i = 0; i < dim_num; i++) {
sink_tensor_desc.shape[i + 1] = shape.GetDim(static_cast<size_t>(i));
}
const auto ori_shape = tensor_desc->GetOriginShape();
const int64_t ori_dim_num = static_cast<int64_t>(ori_shape.GetDimNum());
sink_tensor_desc.original_shape[0] = ori_dim_num;
if (ori_dim_num > kMaxDimSize) {
GELOGE(PARAM_INVALID, "original shape dim size[%" PRId64 "] out of range[%zu]", ori_dim_num, kMaxDimSize);
return FAILED;
}
for (int64_t i = 0; i < ori_dim_num; i++) {
sink_tensor_desc.original_shape[i + 1] = ori_shape.GetDim(static_cast<size_t>(i));
}
return SUCCESS;
}
Status ModelUtils::GetInputOutputDescAddrs(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc,
const OpDesc::Vistor<GeTensorDescPtr> &tensor_desc_visitor,
const std::vector<uint64_t> &mem_type,
std::vector<void *> &v_addrs) {
std::vector<int64_t> v_data_mem_type;
(void) AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, v_data_mem_type);
size_t tensor_cnt = 0UL;
size_t desc_idx = 0UL;
for (const auto &tensor_desc : tensor_desc_visitor) {
size_t cur_desc_idx = desc_idx++;
while ((tensor_cnt < v_addrs.size()) && (tensor_cnt < mem_type.size()) &&
(mem_type[tensor_cnt] == kFixMemType) && (v_addrs[tensor_cnt] == nullptr)) {
tensor_cnt++;
}
if (tensor_desc == nullptr) {
continue;
}
if (TensorUtils::IsMemorySizeCalcTypeAlwaysEmpty(*tensor_desc)) {
GELOGD("%s is an optional output.", op_desc->GetName().c_str());
continue;
}
int64_t mem_offset;
const bool has_offset_attr = AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_DESC_MEM_OFFSET, mem_offset);
if (!has_offset_attr) {
tensor_cnt++;
continue;
}
constexpr size_t size = sizeof(struct RuntimeTensorDesc);
GE_IF_BOOL_EXEC(!ValidateMemRange(op_desc, model_param.mem_size, mem_offset, static_cast<int64_t>(size)),
return FAILED);
void *mem_addr = nullptr;
if ((v_data_mem_type.size() > cur_desc_idx) &&
(v_data_mem_type[cur_desc_idx] == static_cast<int64_t>(RT_MEMORY_TS))) {
mem_addr = model_param.ts_mem_mall->Acquire(mem_offset, size);
} else {
mem_addr = model_param.GetMemAddr(mem_offset);
}
if (tensor_cnt >= v_addrs.size()) {
GELOGE(FAILED, "[Check] update tensor desc addr failed, tensor_cnt:%zu, size:%zu", tensor_cnt, v_addrs.size());
return FAILED;
}
RuntimeTensorDesc sink_tensor_desc;
GE_CHK_STATUS_RET_NOLOG(FillSinkTensorDesc(sink_tensor_desc, tensor_desc, PtrToValue(v_addrs[tensor_cnt])));
const aclError rt_ret = aclrtMemcpy(mem_addr, size, &sink_tensor_desc, size, ACL_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != ACL_SUCCESS) {
REPORT_INNER_ERR_MSG("E19999", "Call aclrtMemcpy failed, size:%zu, ret:%d", size, rt_ret);
GELOGE(RT_FAILED, "[Call][aclrtMemcpy] copy data_addr failed, size:%zu, ret:%d", size, rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
v_addrs[tensor_cnt] = mem_addr;
GELOGD("Calc op[%s] tenser[%zu] desc addr[%p] ok", op_desc->GetName().c_str(), tensor_cnt, mem_addr);
tensor_cnt++;
}
return SUCCESS;
}
std::vector<uint64_t> ModelUtils::GetWorkspaceDataAddrsValue(const RuntimeParam &model_param,
const ConstOpDescPtr &op_desc) {
std::vector<uint64_t> mem_type;
return GetWorkspaceDataAddrsValue(model_param, op_desc, mem_type);
}
std::vector<uint64_t> ModelUtils::GetWorkspaceDataAddrsValue(const RuntimeParam &model_param,
const ConstOpDescPtr &op_desc,
std::vector<uint64_t> &mem_type) {
return VPtrToValue(GetWorkspaceDataAddrs(model_param, op_desc, mem_type));
}
std::vector<void *> ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc) {
std::vector<uint64_t> mem_type;
return GetWorkspaceDataAddrs(model_param, op_desc, mem_type);
}
std::vector<void *> ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc,
std::vector<uint64_t> &mem_type) {
std::vector<void *> v_workspace_data_addr;
GE_CHECK_NOTNULL_EXEC(op_desc, return v_workspace_data_addr);
GELOGD("Start GetWorkspaceDataAddrs: op_name[%s].", op_desc->GetName().c_str());
const std::vector<int64_t> v_workspace_offset = op_desc->GetWorkspace();
const std::vector<int64_t> v_workspace_bytes = op_desc->GetWorkspaceBytes();
if (v_workspace_offset.size() != v_workspace_bytes.size()) {
GELOGW("v_workspace_offset.size()[%zu] != v_workspace_bytes.size()[%zu]", v_workspace_offset.size(),
v_workspace_bytes.size());
return v_workspace_data_addr;
}
vector_bit_t workspace_reuse_flag;
const bool has_workspace_reuse = AttrUtils::GetListBool(op_desc, "workspace_reuse_flag", workspace_reuse_flag);
std::vector<int64_t> v_memory_type;
std::vector<int64_t> workspace_memory_type;
const bool has_mem_type_attr = AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, v_memory_type);
const bool has_mem_type_workspace =
AttrUtils::GetListInt(op_desc, ATTR_NAME_WORKSPACE_TYPE_LIST, workspace_memory_type);
if ((has_mem_type_attr && (v_memory_type.size() != v_workspace_offset.size())) ||
(has_mem_type_workspace && (workspace_memory_type.size() != v_workspace_offset.size()))) {
REPORT_INNER_ERR_MSG("E19999",
"Attr:%s, memory_type.size:%zu and %s, memory_type.size:%zu and workspaces num:%zu should be "
"same, op:%s(%s), check invalid",
TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), v_memory_type.size(),
ATTR_NAME_WORKSPACE_TYPE_LIST.c_str(), workspace_memory_type.size(), v_workspace_offset.size(),
op_desc->GetName().c_str(), op_desc->GetType().c_str());
GELOGE(PARAM_INVALID,
"[Check][Param] Attr:%s, memory_type.size:%zu and %s, memory_type.size:%zu and workspaces num:%zu should be "
"same, op:%s(%s), check invalid",
TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), v_memory_type.size(), ATTR_NAME_WORKSPACE_TYPE_LIST.c_str(),
workspace_memory_type.size(), v_workspace_offset.size(), op_desc->GetName().c_str(),
op_desc->GetType().c_str());
return v_workspace_data_addr;
}
std::vector<int32_t> workspace_no_reuse_scope;
const bool has_workspace_no_reuse_scope = AttrUtils::GetListInt(op_desc, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE,
workspace_no_reuse_scope);
v_workspace_data_addr.reserve(v_workspace_bytes.size());
for (size_t i = 0U; i < v_workspace_bytes.size(); ++i) {
const bool aicpu_work_space = (has_workspace_reuse && (i < workspace_reuse_flag.size()) &&
(!workspace_reuse_flag[i]) && (!model_param.is_single_op));
if (aicpu_work_space) {
void *const mem_addr = model_param.aicpu_mem_mall->Acquire(v_workspace_offset[i],
static_cast<uint64_t>(v_workspace_bytes[i]));
v_workspace_data_addr.push_back(mem_addr);
mem_type.push_back(kAicpuMemMallMemType);
GELOGI("[IMAS]graph_%u type[F] name[%s] aicpu workspace[%zu] offset[%" PRId64 "] bytes[%" PRId64 "] memaddr[%p]",
model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i],
mem_addr);
continue;
}
const bool session_scope_memory = (has_workspace_no_reuse_scope) && (i < workspace_no_reuse_scope.size()) &&
(workspace_no_reuse_scope[i] == kSessionNoReuse);
const bool is_p2p_memory =
has_mem_type_workspace && (static_cast<uint64_t>(workspace_memory_type[i]) == RT_MEMORY_P2P_DDR);
const bool is_l1_memory = has_mem_type_attr && (static_cast<uint64_t>(v_memory_type[i]) == RT_MEMORY_L1);
const bool is_ub_memory = has_mem_type_attr && (static_cast<uint64_t>(v_memory_type[i]) == kRtMemoryUB);
const uint64_t memory_type = GetWorkspaceMemTypeByPriority(is_p2p_memory, is_l1_memory, is_ub_memory,
session_scope_memory);
const NodeMemInfo node_mem_info{memory_type, op_desc, i, kWorkSpace, v_workspace_bytes[i], v_workspace_offset[i]};
void *mem_addr = nullptr;
if (RefreshAddressByMemType(model_param, node_mem_info, mem_addr) != SUCCESS) {
GELOGE(FAILED, "[IMAS]get failed for graph_%u %s", model_param.graph_id, node_mem_info.ToString().c_str());
return {};
}
GELOGI("[IMAS]graph_%u %s memaddr[%p]", model_param.graph_id, node_mem_info.ToString().c_str(), mem_addr);
v_workspace_data_addr.push_back(mem_addr);
mem_type.push_back(memory_type);
}
return v_workspace_data_addr;
}
Status ModelUtils::GetRtAddress(const RuntimeParam ¶m, const uintptr_t logic_addr, uint8_t *&mem_addr) {
uint64_t mem_type = kFixMemType;
return GetRtAddress(param, logic_addr, mem_addr, mem_type);
}
Status ModelUtils::GetRtAddress(const RuntimeParam ¶m, const uintptr_t logic_addr, uint8_t *&mem_addr,
uint64_t &mem_type) {
if (logic_addr == std::numeric_limits<uintptr_t>::max()) {
GELOGI("Got placeholder logic addr.");
mem_addr = nullptr;
return SUCCESS;
}
void *runtime_base_addr = nullptr;
uint64_t max_logic_offset = 0U;
uint64_t max_var_mem_size = 0U;
GE_CHK_STATUS_RET(GetMaxVarMemSize(param, max_var_mem_size), "Failed to get MaxVarMemSize");
bool is_check_var_manager = (GetVarConstPlaceHolderMemSize(param) > 0U || param.var_size > 0U);
if ((param.logic_mem_base <= logic_addr) && (logic_addr < (param.logic_mem_base + param.mem_size))) {
mem_type = kFmMemType;
const size_t logical_offset = logic_addr - param.logic_mem_base;
mem_addr = reinterpret_cast<uint8_t *>(param.GetMemAddr(static_cast<int64_t>(logical_offset)));
return SUCCESS;
} else if ((param.logic_weight_base <= logic_addr) && (logic_addr < (param.logic_weight_base + param.weight_size))) {
mem_type = kWeightMemType;
runtime_base_addr = ValueToPtr(param.weight_base - param.logic_weight_base);
max_logic_offset = param.logic_weight_base + param.weight_size;
GELOGI("The logic addr:0x%" PRIx64 " is weight address, base:0x%" PRIx64 ", size:%" PRIu64 ", mem_type:%" PRIu64 ".",
logic_addr, param.logic_weight_base, param.weight_size, mem_type);
} else if (is_check_var_manager && (param.logic_var_base <= logic_addr)
&& (logic_addr < (param.logic_var_base + max_var_mem_size))) {
const auto &iter = param.fileconstant_addr_mapping.find(static_cast<int64_t>(logic_addr));
if (iter != param.fileconstant_addr_mapping.cend()) {
GELOGI("Find mapping existed, logic_addr:%" PRId64 ", dev_addr:0x%" PRIx64,
static_cast<int64_t>(logic_addr), iter->second);
mem_addr = PtrToPtr<void, uint8_t>(ValueToPtr(iter->second));
GE_CHECK_NOTNULL(mem_addr);
mem_type = kConstantMemType;
return SUCCESS;
}
const auto &var_manager = VarManager::Instance(param.session_id);
GE_CHECK_NOTNULL(var_manager);
uint8_t *var_logic_addr = nullptr;
var_logic_addr = PtrToPtr<void, uint8_t>(ValueToPtr(logic_addr));
mem_addr = var_manager->GetVarMemoryAddr(var_logic_addr, RT_MEMORY_HBM, param.device_id);
GE_CHECK_NOTNULL(mem_addr);
mem_type = kVarAutoMemType;
return SUCCESS;
} else if (logic_addr != 0U) {
for (const auto &iter : param.memory_infos) {
const auto &mem_info = iter.second;
GE_ASSERT_TRUE(mem_info.logic_memory_base >= 0);
const uint64_t logic_begin = mem_info.memory_type == RT_MEMORY_P2P_DDR
? param.logic_mem_base + param.mem_size
: static_cast<uint64_t>(mem_info.logic_memory_base);
GE_ASSERT_TRUE(mem_info.memory_size >= 0);
if ((logic_begin <= logic_addr) && (logic_addr < logic_begin + static_cast<uint64_t>(mem_info.memory_size))) {
mem_addr = mem_info.memory_base + (logic_addr - logic_begin);
mem_type = mem_info.memory_type;
GELOGI("The logic addr:0x%" PRIx64 " matches type [%" PRIu64 "] address, logic base:0x%" PRIx64 ", size:%" PRIu64
", mem_addr:%p", logic_addr, mem_type, logic_begin, mem_info.memory_size, mem_addr);
return SUCCESS;
}
}
mem_addr = nullptr;
REPORT_INNER_ERR_MSG("E19999", "Check param logic addr:0x%" PRIx64 " abnormal", static_cast<uint64_t>(logic_addr));
GELOGE(PARAM_INVALID, "[Check][Param] The logic addr:0x%" PRIx64 " is abnormal", logic_addr);
return PARAM_INVALID;
} else {
GELOGW("The logic addr is:0x%" PRIx64 ", base:0x%" PRIx64 ", size:%" PRIu64,
logic_addr, param.logic_var_base, param.var_size);
}
mem_addr = PtrAdd<uint8_t>(static_cast<uint8_t *>(runtime_base_addr), static_cast<size_t>(max_logic_offset),
static_cast<size_t>(logic_addr));
GELOGI("The logic addr:0x%" PRIx64 " matches type [%" PRIu64 "] address, mem_addr:%p", logic_addr, mem_type, mem_addr);
return SUCCESS;
}
Status ModelUtils::SetDevice(const uint32_t device_id) {
GE_ASSERT_TRUE(device_id != kInvalidDeviceId);
GE_ASSERT_RT_OK(aclrtSetDevice(static_cast<int32_t>(device_id)),
"Call aclrtSetDevice failed, device_id:%u", device_id);
return SUCCESS;
}
Status ModelUtils::ResetDevice(const uint32_t device_id) {
const rtError_t rt_ret = aclrtResetDevice(static_cast<int32_t>(device_id));
if (rt_ret != ACL_SUCCESS) {
REPORT_INNER_ERR_MSG("E19999", "Call aclrtResetDevice failed, device_id:%u, ret:%d", device_id, rt_ret);
GELOGE(RT_FAILED, "[Call][aclrtResetDevice] failed, device_id:%u, ret:%d", device_id, rt_ret);
return RT_FAILED;
}
return SUCCESS;
}
Status ModelUtils::CalculateAicpuBlockingEventNum(const GeModelPtr &ge_model, uint32_t &aicpu_blocking_event_num) {
const auto compute_graph = ge_model->GetGraph().get();
GE_CHECK_NOTNULL(compute_graph);
std::unordered_set<int64_t> stream_bloking;
for (const auto &node : compute_graph->GetAllNodes()) {
bool is_blocking_aicpu_op = false;
const OpDescPtr op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
(void)AttrUtils::GetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, is_blocking_aicpu_op);
if (!is_blocking_aicpu_op) {
continue;
}
GELOGD("Get op:%s attribute(is_blocking_op), value:%d", op_desc->GetName().c_str(),
static_cast<int32_t>(is_blocking_aicpu_op));
(void)stream_bloking.insert(op_desc->GetStreamId());
}
aicpu_blocking_event_num = static_cast<uint32_t>(stream_bloking.size());
return SUCCESS;
}
Status ModelUtils::CalculateHcclGroupOrderedEventNum(const GeModelPtr &ge_model,
uint32_t &hccl_group_ordered_event_num) {
const auto &model_def = ge_model->GetModelTaskDefPtr();
GE_CHECK_NOTNULL(model_def);
const auto compute_graph = ge_model->GetGraph().get();
GE_CHECK_NOTNULL(compute_graph);
std::unordered_set<std::string > hccl_group_id_set;
for (const auto &node : compute_graph->GetAllNodes()) {
OpDescPtr op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
std::vector<std::string> hccl_group_id_list;
const bool has_gropu_id_list = (AttrUtils::GetListStr(op_desc, ATTR_NAME_HCCL_GROUP_ID_LIST, hccl_group_id_list) &&
!hccl_group_id_list.empty());
if (has_gropu_id_list) {
for (const auto &group_id : hccl_group_id_list) {
GELOGI("Get op:%s attribute(hccl_group_id_list) group id:%s", op_desc->GetName().c_str(), group_id.c_str());
hccl_group_id_set.insert(group_id);
}
}
}
hccl_group_ordered_event_num = static_cast<uint32_t>(hccl_group_id_set.size());
return SUCCESS;
}
Status ModelUtils::CalculateFollowStream(const GeModelPtr &ge_model, uint64_t &hccl_fellow_stream_num) {
const auto &model_def = ge_model->GetModelTaskDefPtr();
GE_CHECK_NOTNULL(model_def);
const auto compute_graph = ge_model->GetGraph().get();
GE_CHECK_NOTNULL(compute_graph);
std::map<uint32_t, OpDescPtr> op_list;
for (const auto &node : compute_graph->GetAllNodes()) {
OpDescPtr op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
(void)op_list.emplace(op_desc->GetId(), op_desc);
}
std::multimap<int64_t, uint64_t> main_follow_num;
for (int32_t i = 0; i < model_def->task_size(); i++) {
const domi::TaskDef &task = model_def->task(i);
if (static_cast<ModelTaskType>(task.type()) == ModelTaskType::MODEL_TASK_HCCL) {
auto const &hccl_def = task.kernel_hccl();
const auto it = op_list.find(hccl_def.op_index());
GE_CHK_BOOL_RET_STATUS(it != op_list.end(), FAILED, "Failed to find op index %u in op list",
hccl_def.op_index());
const OpDescPtr hccl_op_desc = it->second;
int64_t main_stream_id = hccl_op_desc->GetStreamId();
int64_t follow_stream_num = 0;
if (!AttrUtils::GetInt(hccl_op_desc, kUsedStreamNum, follow_stream_num)) {
GELOGW("Get used stream num failed, op is %s", hccl_op_desc->GetName().c_str());
}
(void)main_follow_num.emplace(main_stream_id, follow_stream_num);
}
}
hccl_fellow_stream_num = CalFollowStreamSum(main_follow_num);
return SUCCESS;
}
uint64_t ModelUtils::CalFollowStreamSum(const std::multimap<int64_t, uint64_t> &hccl_stream_map) {
std::map<int64_t, uint64_t> max_follow_stream_map;
for (const auto &it : hccl_stream_map) {
const std::map<int64_t, uint64_t>::const_iterator max_it =
static_cast<std::map<int64_t, uint64_t>::const_iterator>(max_follow_stream_map.find(it.first));
if (max_it == max_follow_stream_map.cend()) {
(void)max_follow_stream_map.emplace(it.first, it.second);
continue;
}
if (it.second > max_it->second) {
max_follow_stream_map.at(max_it->first) = it.second;
}
}
uint64_t need_follow_stream_num = 0U;
for (const auto &follow_it : max_follow_stream_map) {
need_follow_stream_num = need_follow_stream_num + follow_it.second;
}
GELOGD("Need follow num is %" PRIu64, need_follow_stream_num);
return need_follow_stream_num;
}
bool ModelUtils::IsReuseZeroCopyMemory() {
static const std::string kEnabled = "1";
std::string reuse_zero_copy_memory;
(void)ge::GetContext().GetOption(OPTION_EXEC_REUSE_ZERO_COPY_MEMORY, reuse_zero_copy_memory);
return (reuse_zero_copy_memory == kEnabled);
}
bool ModelUtils::IsGeUseExtendSizeMemory(bool dynamic_graph) {
return VarManager::IsGeUseExtendSizeMemory(dynamic_graph);
}
vector_bit_t ModelUtils::GetInputTensorNeedRawData(const OpDescPtr &op_desc) {
vector_bit_t need_raw_data_list;
for (const auto &input_desc : op_desc->GetAllInputsDescPtr()) {
bool is_no_tiling = false;
(void)ge::AttrUtils::GetBool(input_desc, ATTR_NAME_TENSOR_NO_TILING_MEM_TYPE, is_no_tiling);
need_raw_data_list.push_back(!is_no_tiling);
}
return need_raw_data_list;
}
Status ModelUtils::InitRuntimeParams(const GeModelPtr &ge_model, RuntimeParam &runtime_param,
const uint32_t device_id) {
(void)AttrUtils::GetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, runtime_param.mem_size);
(void)AttrUtils::GetInt(ge_model, ATTR_MODEL_WEIGHT_SIZE, runtime_param.weight_size);
(void)AttrUtils::GetInt(ge_model, ATTR_MODEL_STREAM_NUM, runtime_param.stream_num);
(void)AttrUtils::GetInt(ge_model, ATTR_MODEL_NOTIFY_NUM, runtime_param.notify_num);
(void)AttrUtils::GetListInt(ge_model, ATTR_MODEL_NOTIFY_TYPES, runtime_param.notify_types);
(void)AttrUtils::GetInt(ge_model, ATTR_MODEL_EVENT_NUM, runtime_param.event_num);
(void)AttrUtils::GetInt(ge_model, ATTR_MODEL_LABEL_NUM, runtime_param.label_num);
(void)AttrUtils::GetInt(ge_model, ATTR_MODEL_BATCH_NUM, runtime_param.batch_num);
(void)AttrUtils::GetInt(ge_model, MODEL_ATTR_TASK_GEN_BASE_ADDR, runtime_param.logic_mem_base);
(void)AttrUtils::GetInt(ge_model, MODEL_ATTR_TASK_GEN_WEIGHT_ADDR, runtime_param.logic_weight_base);
(void)AttrUtils::GetInt(ge_model, MODEL_ATTR_SESSION_ID, runtime_param.session_id);
(void)AttrUtils::GetInt(ge_model, ATTR_MODEL_TASK_GEN_VAR_ADDR, runtime_param.logic_var_base);
(void)AttrUtils::GetInt(ge_model, ATTR_MODEL_VAR_SIZE, runtime_param.var_size);
(void)AttrUtils::GetInt(ge_model, ATTR_MODEL_ZERO_COPY_MEMORY_SIZE, runtime_param.zero_copy_size);
(void)AttrUtils::GetInt(ge_model, MODEL_ATTR_HOST_MEMORY_SIZE, runtime_param.host_mem_size);
(void)AttrUtils::GetInt(ge_model, MODEL_ATTR_TASK_GEN_HOST_BASE_ADDR, runtime_param.host_logic_mem_base);
(void)AttrUtils::GetInt(ge_model, MODEL_ATTR_HOST_SVM_SIZE, runtime_param.host_svm_size);
(void)AttrUtils::GetInt(ge_model, MODEL_ATTR_TASK_GEN_HOST_SVM_BASE_ADDR, runtime_param.host_svm_logic_mem_base);
runtime_param.device_id = device_id;
runtime_param.fm_memory_infos.clear();
runtime_param.fixed_fm_memory_infos.clear();
runtime_param.memory_infos.clear();
bool is_fixed_prior_fm = (runtime_param.fixed_mem_base != 0U);
GELOGD("runtime_param.fixed_mem_base:0x%" PRIx64 ", is_fixed_prior_fm:%d",
runtime_param.fixed_mem_base, is_fixed_prior_fm);
int64_t total_hbm_size = 0;
const auto &memory_info_vec = GetAllMemoryTypeSize(ge_model);
for (auto &i : memory_info_vec) {
if (i.memory_type == RT_MEMORY_HBM) {
if (is_fixed_prior_fm && i.is_fixed_addr_prior) {
runtime_param.fixed_fm_memory_infos.push_back(i);
} else {
runtime_param.fm_memory_infos.push_back(i);
}
total_hbm_size += i.memory_size;
continue;
}
runtime_param.memory_infos[i.memory_type] = i;
}
GE_ASSERT_EQ(ge::IntegerChecker<int64_t>::Compat(runtime_param.mem_size), true);
GE_ASSERT_EQ(total_hbm_size, (static_cast<int64_t>(runtime_param.mem_size) - runtime_param.zero_copy_size));
runtime_param.fileconstant_addr_mapping.clear();
return SUCCESS;
}
Status ModelUtils::GetHbmFeatureMapMemInfo(const GeModelPtr &ge_model, std::vector<MemInfo> &all_mem_info,
bool get_zero_copy) {
std::vector<std::vector<int64_t>> sub_memory_infos;
(void)AttrUtils::GetListListInt(ge_model, ATTR_MODEL_SUB_MEMORY_INFO, sub_memory_infos);
if (sub_memory_infos.empty()) {
MemInfo default_mem_info{};
int64_t zero_copy_size = 0;
(void)AttrUtils::GetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, default_mem_info.memory_size);
(void)AttrUtils::GetInt(ge_model, ATTR_MODEL_ZERO_COPY_MEMORY_SIZE, zero_copy_size);
default_mem_info.memory_size -= zero_copy_size;
default_mem_info.memory_type = RT_MEMORY_HBM;
GELOGD("Get feature map memory info with details: [%s]", default_mem_info.ToString().c_str());
all_mem_info.emplace_back(std::move(default_mem_info));
return SUCCESS;
}
const size_t fm_memory_info_size = sub_memory_infos.size() - 1U;
for (size_t index = 0; index < sub_memory_infos.size(); ++index) {
if ((index == (fm_memory_info_size)) && (!get_zero_copy)) {
continue;
}
const auto &sub_memory_info = sub_memory_infos[index];
GE_ASSERT_TRUE(sub_memory_info.size() >= 3U);
GE_ASSERT_EQ(sub_memory_info[0U], static_cast<int64_t>(RT_MEMORY_HBM));
MemInfo one_fm_mem_info;
one_fm_mem_info.memory_type = RT_MEMORY_HBM;
one_fm_mem_info.logic_memory_base = sub_memory_info[1U];
one_fm_mem_info.memory_size = sub_memory_info[2U];
one_fm_mem_info.memory_base = reinterpret_cast<uint8_t *>(one_fm_mem_info.logic_memory_base);
one_fm_mem_info.is_fixed_addr_prior = ((sub_memory_info.size() > 3U) ? sub_memory_info[3U] : false);
GELOGD("Get one sub feature map memory info with details: [%s]", one_fm_mem_info.ToString().c_str());
all_mem_info.emplace_back(std::move(one_fm_mem_info));
}
std::sort(all_mem_info.begin(), all_mem_info.end());
return SUCCESS;
}
std::vector<MemInfo> ModelUtils::GetAllMemoryTypeSize(const GeModelPtr &ge_model) {
std::vector<MemInfo> all_mem_info;
GE_ASSERT_SUCCESS(GetHbmFeatureMapMemInfo(ge_model, all_mem_info));
MemInfo p2p_mem_info{};
(void)AttrUtils::GetInt(ge_model, ATTR_MODEL_P2P_MEMORY_SIZE, p2p_mem_info.memory_size);
p2p_mem_info.memory_type = RT_MEMORY_P2P_DDR;
p2p_mem_info.memory_key = "_p";
all_mem_info.emplace_back(std::move(p2p_mem_info));
MemInfo session_scope_mem_info{};
(void)AttrUtils::GetInt(ge_model, ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE, session_scope_mem_info.memory_size);
session_scope_mem_info.memory_type = (kSessionScopeMemoryMask | RT_MEMORY_HBM);
all_mem_info.emplace_back(std::move(session_scope_mem_info));
MemInfo host_mem_info{};
(void)AttrUtils::GetInt(ge_model, MODEL_ATTR_HOST_MEMORY_SIZE, host_mem_info.memory_size);
(void)AttrUtils::GetInt(ge_model, MODEL_ATTR_TASK_GEN_HOST_BASE_ADDR, host_mem_info.logic_memory_base);
host_mem_info.memory_type = RT_MEMORY_HOST;
host_mem_info.memory_key = "_h";
all_mem_info.emplace_back(std::move(host_mem_info));
MemInfo host_svm_mem_info{};
(void)AttrUtils::GetInt(ge_model, MODEL_ATTR_HOST_SVM_SIZE, host_svm_mem_info.memory_size);
(void)AttrUtils::GetInt(ge_model, MODEL_ATTR_TASK_GEN_HOST_SVM_BASE_ADDR, host_svm_mem_info.logic_memory_base);
host_svm_mem_info.memory_type = RT_MEMORY_HOST_SVM;
host_svm_mem_info.memory_key = "_svm";
all_mem_info.emplace_back(std::move(host_svm_mem_info));
return all_mem_info;
}
Status ModelUtils::MallocExMem(const uint32_t device_id, RuntimeParam &runtime_param) {
for (auto &it : runtime_param.memory_infos) {
const size_t mem_size = static_cast<size_t>(it.second.memory_size);
if (mem_size == 0U) {
continue;
}
const bool sessoion_scope = ((kSessionScopeMemoryMask & it.first) == kSessionScopeMemoryMask);
rtMemType_t memory_type = static_cast<rtMemType_t>(it.first & kMemoryTypeMask);
const rtMemType_t mem_type_from_infos = static_cast<rtMemType_t>(it.second.memory_type);
if (mem_type_from_infos == RT_MEMORY_HOST) {
memory_type = mem_type_from_infos;
it.second.memory_base = PtrToPtr<void, uint8_t>(MemManager::Instance().HostMemInstance().Malloc(mem_size));
runtime_param.host_mem_base = PtrToValue(it.second.memory_base);
} else if (mem_type_from_infos == RT_MEMORY_HOST_SVM) {
memory_type = mem_type_from_infos;
it.second.memory_base =
PtrToPtr<void, uint8_t>(MemoryAllocator(RT_MEMORY_HOST_SVM).MallocMemory(it.second.memory_key, mem_size));
runtime_param.host_svm_mem_base = PtrToValue(it.second.memory_base);
} else if (sessoion_scope) {
auto &mem_instance = MemManager::Instance().SessionScopeMemInstance(memory_type);
it.second.memory_base = mem_instance.Malloc(mem_size, runtime_param.session_id);
} else if ((memory_type == RT_MEMORY_P2P_DDR) && (runtime_param.p2p_fixed_mem_base != 0U)) {
GE_ASSERT_TRUE(runtime_param.p2p_fixed_mem_size >= mem_size,
"runtime_param.p2p_fixed_mem_size: %zu, mem_size:%zu", runtime_param.p2p_fixed_mem_size, mem_size);
it.second.memory_base = PtrToPtr<void, uint8_t>(ValueToPtr(runtime_param.p2p_fixed_mem_base));
GELOGI(
"graph_%u use p2p_fixed_mem_base, mem_type[RT_MEMORY_P2P_DDR] "
"mem_type_origin[%" PRIu64 "] mem_addr[%p] size[%zu].",
runtime_param.graph_id, it.first, it.second.memory_base, mem_size);
continue;
} else {
const std::string purpose = MemTypeUtils::ToString(memory_type);
auto &mem_instance = MemManager::Instance().MemInstance(memory_type);
it.second.memory_base = mem_instance.MallocMemory(purpose, mem_size, device_id);
}
if (it.second.memory_base == nullptr) {
GELOGW("Alloc extra memory failed, type:%u size: %zu", memory_type, mem_size);
}
GEEVENT(
"[IMAS]InitFeatureMap graph_%u MallocMemory type[F] mem_type[%u] "
"mem_type_origin[%" PRIu64 "] mem_addr[%p] size[%zu].",
runtime_param.graph_id, memory_type, it.first, it.second.memory_base, mem_size);
}
return SUCCESS;
}
void ModelUtils::FreeExMem(const uint32_t device_id, RuntimeParam &runtime_param,
const uint64_t session_id, const bool is_online) {
for (auto &it : runtime_param.memory_infos) {
if ((kSessionScopeMemoryMask & it.first) == kSessionScopeMemoryMask) {
if ((!is_online) && (it.second.memory_base != nullptr)) {
MemManager::Instance().FreeSessionMemory(session_id);
it.second.memory_base = nullptr;
}
continue;
}
if (it.second.memory_type == RT_MEMORY_HOST) {
if (it.second.memory_base != nullptr) {
(void)MemManager::Instance().HostMemInstance().Free(PtrToPtr<uint8_t, void>(it.second.memory_base));
it.second.memory_base = nullptr;
continue;
}
}
if (it.second.memory_type == RT_MEMORY_HOST_SVM) {
if (it.second.memory_base != nullptr) {
(void)MemoryAllocator(RT_MEMORY_HOST_SVM).FreeMemory(it.second.memory_base);
it.second.memory_base = nullptr;
continue;
}
}
if (it.second.memory_type == RT_MEMORY_P2P_DDR) {
FreeP2pMem(device_id, runtime_param, it);
continue;
}
const rtMemType_t memory_type = static_cast<rtMemType_t>(it.first & kMemoryTypeMask);
auto &mem_instance = MemManager::Instance().MemInstance(memory_type);
if (it.second.memory_base != nullptr) {
GE_CHK_STATUS(mem_instance.FreeMemory(it.second.memory_base, device_id), "failed to free memory");
it.second.memory_base = nullptr;
}
}
}
bool ModelUtils::IsSuppoprtAddrRefreshable(const uint64_t mem_type) {
return (mem_type == static_cast<uint64_t>(MemoryAppType::kMemoryTypeFeatureMap)) ||
(mem_type == static_cast<uint64_t>(MemoryAppType::kMemoryTypeModelIo));
}
void ModelUtils::GetAddrRefreshableFlagsByMemTypes(const std::vector<uint64_t> &mem_types,
std::vector<uint8_t> &flags) {
for (const auto &mem_type : mem_types) {
const bool refresh = IsSuppoprtAddrRefreshable(mem_type);
flags.push_back(refresh ? 1U : 0U);
}
}
bool ModelUtils::IsFeatureMapOrModelIoType(const uint64_t mem_type) {
return ((mem_type == kFmMemType) ||
(mem_type == static_cast<uint64_t>(RT_MEMORY_HBM)) || (mem_type == static_cast<uint64_t>(RT_MEMORY_L2)) ||
(mem_type == static_cast<uint64_t>(RT_MEMORY_DEFAULT)));
}
Status ModelUtils::GetSpaceRegistries(const ge::GeRootModelPtr &root_model,
std::shared_ptr<gert::OpImplSpaceRegistryV2Array> &space_registries) {
GE_ASSERT_NOTNULL(root_model);
if (space_registries == nullptr) {
space_registries = ge::MakeShared<gert::OpImplSpaceRegistryV2Array>();
GE_ASSERT_NOTNULL(space_registries);
}
std::vector<OpSoBinPtr> so_list{};
GetSpecificSoBins(root_model, SoBinType::kSpaceRegistry, so_list);
if (!so_list.empty()) {
std::map<OppImplVersion, std::vector<ge::OpSoBinPtr>> version_2_so_lists;
for (const auto &so_within_om : so_list) {
const auto &vendor_name = so_within_om->GetVendorName();
GELOGD("find so in om vendor path %s", so_within_om->GetVendorName().c_str());
bool found = false;
for (const auto &vendor_path_2_version : kVersion2VendorPath) {
if (vendor_name.find(vendor_path_2_version.second) != std::string::npos) {
GELOGI("Add opp version:[%zu] so from root model!", vendor_path_2_version.first);
version_2_so_lists[vendor_path_2_version.first].emplace_back(so_within_om);
found = true;
break;
}
}
if (!found) {
GELOGI("vendor_name:[%s] without path identifier, and default is opp version.", vendor_name.c_str());
version_2_so_lists[OppImplVersion::kOpp].emplace_back(so_within_om);
}
}
for (const auto &version_2_so_list : version_2_so_lists) {
GELOGI("Load opp version:[%zu] so from root model, so size = %zu!", version_2_so_list.first,
version_2_so_list.second.size());
auto space_registry = ge::MakeShared<gert::OpImplSpaceRegistryV2>();
GE_ASSERT_NOTNULL(space_registry);
{
std::vector<gert::OppSoDesc> opp_so_desc_list;
std::vector<std::string> opp_dir_list;
GE_MAKE_GUARD(clear_tmp_opp_dir_list, [&opp_dir_list]() -> void {
for (const auto& opp_dir : opp_dir_list) {
GELOGD("clear_tmp_opp_dir_list opp dir: %s", opp_dir.c_str());
(void)RmOmOppDir(opp_dir);
}
});
for (const auto &so_bin : version_2_so_list.second) {
GE_ASSERT_NOTNULL(so_bin);
GE_ASSERT_TRUE(so_bin->GetBinDataSize() <= kGByteSize);
std::string opp_dir;
GE_ASSERT_SUCCESS(CreateOmOppDir(opp_dir));
opp_dir_list.emplace_back(opp_dir);
const auto &so_path = opp_dir + so_bin->GetSoName();
GE_ASSERT_GRAPH_SUCCESS(SaveToFile(so_bin, so_path));
opp_so_desc_list.emplace_back(gert::OppSoDesc(std::vector<ge::AscendString>{ge::AscendString(so_path.c_str())},
so_bin->GetSoName().c_str()));
}
for (const auto &opp_so_desc : opp_so_desc_list) {
GELOGD("Prepare to AddSoToRegistry, so name: %s", opp_so_desc.GetPackageName().GetString());
const auto is_opp_depend_common_so = opp_so_desc.GetPackageName().Find("libophost_comm_legacy.so");
if (is_opp_depend_common_so != std::string::npos) {
GELOGD("AddSoToRegistry skiped, the specify so: libophost_comm_legacy.so");
continue;
}
GE_ASSERT_GRAPH_SUCCESS(space_registry->AddSoToRegistry(opp_so_desc));
}
}
space_registries->at(static_cast<size_t>(version_2_so_list.first)) = space_registry;
}
} else {
GELOGI("No space registries in root model, use default so!");
GE_ASSERT_TRUE(space_registries->size() == static_cast<size_t>(gert::OppImplVersionTag::kVersionEnd));
for (size_t i = 0U; i < static_cast<size_t>(gert::OppImplVersionTag::kVersionEnd); i++) {
auto space_registry =
gert::DefaultOpImplSpaceRegistryV2::GetInstance().GetSpaceRegistry(static_cast<gert::OppImplVersionTag>(i));
if (space_registry == nullptr) {
continue;
}
space_registries->at(i) = space_registry;
}
}
return ge::SUCCESS;
}
bool ModelUtils::IsAICoreKernel(const ge::ccKernelType kernel_type) {
static std::set<ge::ccKernelType> aicore_kernel_type{ge::ccKernelType::TE, ge::ccKernelType::MIX_AICORE,
ge::ccKernelType::MIX_VECTOR_CORE};
return aicore_kernel_type.count(kernel_type) > 0UL;
}
std::string ModelUtils::GetOpMasterDeviceKey(const uint32_t &model_id, const std::string &so_path) {
if (so_path.find(kInner) != std::string::npos) {
const auto &pos = so_path.find_last_of("/");
GE_ASSERT_TRUE(pos != std::string::npos);
return so_path.substr(pos + 1UL);
} else {
return GetOpMasterDeviceCustKey(model_id, so_path);
}
}
std::string ModelUtils::GetOpMasterDeviceCustKey(const uint32_t &model_id, const std::string &so_path) {
const auto vendor_pos = so_path.find(kVendors);
GE_ASSERT_TRUE(vendor_pos != std::string::npos);
const auto op_impl_pos = so_path.find(kOpImpl);
GE_ASSERT_TRUE(op_impl_pos != std::string::npos);
const auto so_pos = so_path.find_last_of("/");
GE_ASSERT_TRUE(so_pos != std::string::npos);
auto cust_key = std::to_string(model_id)
.append("/")
.append(so_path.substr(vendor_pos, op_impl_pos - vendor_pos))
.append(so_path.substr(so_pos));
const auto &new_end = std::unique(cust_key.begin(), cust_key.end(), [](const char_t c1, const char_t c2) -> bool {
return (c1 == '/') && (c2 == '/');
});
cust_key.erase(new_end, cust_key.end());
std::replace(cust_key.begin(), cust_key.end(), '/', '_');
GELOGD("Get op master device cust key is %s.", cust_key.c_str());
return cust_key;
}
Status ModelUtils::GetOpMasterDevice(const uint32_t &model_id, const ge::GeRootModelPtr &root_model,
std::unordered_map<std::string, OpSoBinPtr> &built_in_so_bins,
std::unordered_map<std::string, OpSoBinPtr> &cust_so_bins) {
GE_ASSERT_NOTNULL(root_model);
std::vector<OpSoBinPtr> so_list{};
GetSpecificSoBins(root_model, SoBinType::kOpMasterDevice, so_list);
if (!so_list.empty()) {
for (const auto &op_so_bin : so_list) {
const auto &path = op_so_bin->GetVendorName();
const auto &name = op_so_bin->GetSoName();
GELOGD("find so in om vendor path %s, so name %s", path.c_str(), name.c_str());
if (path.find(kInner) != std::string::npos) {
built_in_so_bins.emplace(name, op_so_bin);
GELOGI("[OpMasterDevice][BuiltIn]Get so [%s] in model [%u].", name.c_str(), model_id);
}
const auto &cust_key = GetOpMasterDeviceKey(model_id, path + "/" + name);
cust_so_bins.emplace(cust_key, op_so_bin);
GELOGI("[OpMasterDevice][Custom]Get so [%s]->[%s] in model [%u].",
name.c_str(), cust_key.c_str(), model_id);
}
} else {
GE_ASSERT_SUCCESS(GetOpMasterDeviceFromOppPackage(model_id, built_in_so_bins, cust_so_bins));
}
GELOGI("[OpMasterDevice]Get so num BuiltIn:[%zu] and Custom:[%zu].", built_in_so_bins.size(), cust_so_bins.size());
return SUCCESS;
}
Status ModelUtils::GetOpMasterDeviceFromOppPackage(const uint32_t &model_id,
std::unordered_map<std::string, OpSoBinPtr> &built_in_so_bins,
std::unordered_map<std::string, OpSoBinPtr> &cust_so_bins) {
std::string op_master_device_path;
GE_ASSERT_SUCCESS(PluginManager::GetOpMasterDeviceSoPath(op_master_device_path));
std::vector<std::string> path_vec;
PluginManager::SplitPath(op_master_device_path, path_vec);
for (const auto &path : path_vec) {
const auto &is_built_in = path.find(kInner) != std::string ::npos;
std::vector<std::string> file_list;
PluginManager::GetFileListWithSuffix(path, ".so", file_list);
for (const auto &file : file_list) {
uint32_t bin_len = 0U;
auto op_so_bin = GetBinDataFromFile(file, bin_len);
GE_ASSERT_NOTNULL(op_so_bin, "open so fail, path=%s", file.c_str());
const auto &pos = file.find_last_of("/");
GE_ASSERT_TRUE(pos != std::string::npos);
const auto &so_name = file.substr(pos + 1UL);
const auto &vendor_name = file.substr(0, pos);
const auto proto_bin =
ge::MakeShared<OpSoBin>(so_name, vendor_name, std::move(op_so_bin), bin_len, SoBinType::kOpMasterDevice);
GE_ASSERT_NOTNULL(proto_bin);
if (is_built_in) {
built_in_so_bins.emplace(so_name, proto_bin);
GELOGI("[OpMasterDevice][BuiltIn]Get so [%s] from opp path.", so_name.c_str());
}
const auto &cust_key = GetOpMasterDeviceKey(model_id, file);
cust_so_bins.emplace(cust_key, proto_bin);
GELOGI("[OpMasterDevice][Custom] Get so [%s]->[%s] from opp path.",
so_name.c_str(), cust_key.c_str());
}
}
return SUCCESS;
}
}