* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
#include "graph/build/memory/block_mem_assigner.h"
#include <cinttypes>
#include <algorithm>
#include <sstream>
#include <stack>
#include "graph/ge_context.h"
#include "graph/utils/graph_utils.h"
#include "graph/utils/node_utils.h"
#include "graph/utils/op_desc_utils.h"
#include "graph/utils/tensor_utils.h"
#include "graph/utils/type_utils.h"
#include "graph/utils/op_type_utils.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/build/memory/var_mem_assign_util.h"
#include "graph/optimize/mem_layout_conflict_optimize/mem_layout_conflict_util.h"
#include "common/context/local_context.h"
#include "common/math/math_util.h"
#include "common/checker.h"
#include "common/memory/mem_type_utils.h"
#include "framework/common/op/ge_op_utils.h"
#include "graph/custom_op_factory.h"
#include "graph/optimize/params.h"
#include "framework/common/runtime_tensor_desc.h"
#include "graph/build/memory/dynamic_batch_mem_assigner.h"
#include "runtime/subscriber/global_profiler.h"
using std::unordered_map;
using std::unordered_set;
namespace {
const char *const kAttrNameWorkspaceReuseFlag = "workspace_reuse_flag";
const char *const kL2FusionDynamicConvergeOp = "l2fusion_dynamic_converge_op";
const char *const kOpNoReuseMem = "no_reuse_mem_flag";
const int32_t kReuseMaxOpNum = 10;
const int32_t kReuseMaxCharNum = 2000;
const uint32_t kAutoMode = 1U;
const std::set<ge::DataType> kNotPostReuseDataType = {ge::DT_RESOURCE, ge::DT_VARIANT};
constexpr int64_t kParentNodeDefaultStreamId = -2;
bool IsNodeSupportZeroCopy(const ge::Node *const node) {
const auto op_engine = node->GetOpDesc()->GetOpKernelLibName();
if (op_engine == ge::kEngineNameDsa) {
GELOGD("dsa engine op[%s] not support zero copy", node->GetName().c_str());
return false;
}
if (op_engine == ge::kCustomOpKernelLibName) {
ge::AscendString op_type(node->GetOpDesc()->GetType().c_str());
if (!ge::CustomOpFactory::IsAddressRefreshable(op_type)) {
GELOGD("custom engine op[%s] not support zero copy", node->GetName().c_str());
return false;
}
}
if (ge::OpUtils::IsHcomNodeNotSupportAddrRefresh(node->GetOpDesc())) {
GELOGD("hccl engine op[%s] not support zero copy", node->GetName().c_str());
return false;
}
return true;
}
bool CanReuseZeroCopyBlock(const ge::Node *const node) {
const static std::set<std::string> kConditionOps = {ge::STREAMSWITCH, ge::LABELSWITCHBYINDEX};
if (kConditionOps.count(node->GetTypePtr()) > 0U) {
GELOGD("Condition op[%s] cannot reuse zero copy block", node->GetName().c_str());
return false;
}
const auto op_engine = node->GetOpDesc()->GetOpKernelLibName();
if (op_engine == ge::kCustomOpKernelLibName) {
ge::AscendString op_type(node->GetOpDesc()->GetType().c_str());
if (!ge::CustomOpFactory::IsAddressRefreshable(op_type)) {
GELOGD("custom engine op[%s] not support zero copy", node->GetName().c_str());
return false;
}
}
if (op_engine == ge::kEngineNameDsa) {
GELOGD("dsa engine op[%s] cannot reuse zero copy block", node->GetName().c_str());
return false;
}
if (ge::OpUtils::IsHcomNodeNotSupportAddrRefresh(node->GetOpDesc())) {
GELOGD("hccl engine op[%s] cannot reuse zero copy block", node->GetName().c_str());
return false;
}
return true;
}
bool IsContinuousOutput(const ge::NodePtr &n) {
if (ge::MemLayoutConflictUtil::IsContinuousOutput(n)) {
if (n->GetOwnerComputeGraphBarePtr() != nullptr) {
GELOGI("%s name[%s] set continuous, output size[%u].", n->GetOwnerComputeGraphBarePtr()->GetName().c_str(),
n->GetNamePtr(), n->GetAllOutDataAnchorsSize());
return true;
}
}
return false;
}
bool IsOutNodeInCurComputeGraph(const ge::Node *const node, const ge::ComputeGraphPtr &graph) {
return (node->GetType() == ge::NETOUTPUT) && (node->GetOwnerComputeGraphBarePtr() == graph.get());
}
const std::list<ge::NodeIndexIO> &FindNodeOutputSameAnchors(const ge::NodeIndexIO &node_index_io,
const ge::AnchorToSymbol &anchor_to_symbol,
const ge::SymbolToAnchors &symbol_to_anchors) {
static const std::list<ge::NodeIndexIO> res = {};
const auto &symbol_iter = anchor_to_symbol.find(node_index_io.ToString());
if (symbol_iter != anchor_to_symbol.cend()) {
const auto &anchors_iter = symbol_to_anchors.find(symbol_iter->second);
if (anchors_iter != symbol_to_anchors.cend()) {
return anchors_iter->second;
}
}
return res;
}
size_t GetOutputFlowToNetoutputNum(const ge::NodePtr &node, uint32_t output_index, const ge::ComputeGraphPtr &graph,
const ge::SymbolToAnchors &symbol_to_anchors,
const ge::AnchorToSymbol &anchor_to_symbol) {
auto out_anchor = node->GetOutDataAnchor(static_cast<int32_t>(output_index));
if (out_anchor == nullptr) {
return 0U;
}
size_t num_anchors_to_netoutput = 0U;
ge::NodeIndexIO out_node_index_io(node, output_index, ge::kOut);
const auto &same_anchors = FindNodeOutputSameAnchors(out_node_index_io, anchor_to_symbol, symbol_to_anchors);
bool include_not_support_zero_copy_node = false;
if (!same_anchors.empty()) {
for (const auto &node_index_io : same_anchors) {
if ((node_index_io.io_type_ != ge::kIn) && (node_index_io.node_ptr_ != nullptr) &&
(node_index_io.node_ptr_->GetOpDescBarePtr() != nullptr)) {
if (!IsNodeSupportZeroCopy(node_index_io.node_ptr_)) {
GELOGI("op[%s] not support zero copy", node_index_io.node_ptr_->GetNamePtr());
include_not_support_zero_copy_node = true;
}
continue;
}
if (IsOutNodeInCurComputeGraph(node_index_io.node_.get(), graph)) {
num_anchors_to_netoutput++;
}
}
}
if (num_anchors_to_netoutput > 0U) {
GELOGI("Node %s output %u flow to %zu root graph output", node->GetNamePtr(), output_index,
num_anchors_to_netoutput);
if (include_not_support_zero_copy_node) {
GELOGI("Node %s output %u symbol is same as not support zero copy node", node->GetNamePtr(), output_index);
num_anchors_to_netoutput = 0U;
}
}
return num_anchors_to_netoutput;
}
bool IsStrictReuseZeroMemoryMode() {
const static std::string kEnabled = "1";
std::string reuse_zero_copy_memory;
(void)ge::GetContext().GetOption(ge::OPTION_EXEC_REUSE_ZERO_COPY_MEMORY, reuse_zero_copy_memory);
return (reuse_zero_copy_memory == kEnabled);
}
std::string ToString(const ge::NodeTypeIndex &x) {
std::stringstream ss;
if (x.node_ != nullptr) {
ss << "[" << x.node_->GetNamePtr() << "(" << x.node_->GetTypePtr() << "), ";
} else {
ss << "[ (Subgraph)";
}
switch (x.mem_type_) {
case ge::OpMemoryType::kOutput:ss << "Output, ";
break;
case ge::OpMemoryType::kWorkspace:ss << "Workspace, ";
break;
case ge::OpMemoryType::kOutputDesc:ss << "OutputDesc, ";
break;
default:break;
}
ss << x.index_ << ", ref_input:" << x.ref_input_ << ", begin:" << x.life_time_begin_ << ", end:" << x.life_time_end_
<< ", symbol end:" << x.symbol_max_life_time_end_;
return ss.str();
}
bool IsNoReleaseNodeOutBlock(const ge::Node *const node) {
for (const auto &input_desc : node->GetOpDesc()->GetAllInputsDescPtr()) {
if ((input_desc != nullptr)
&& (kNotPostReuseDataType.find(input_desc->GetDataType()) != kNotPostReuseDataType.cend())) {
return true;
}
}
for (const auto &output_desc : node->GetOpDesc()->GetAllOutputsDescPtr()) {
if ((output_desc != nullptr)
&& (kNotPostReuseDataType.find(output_desc->GetDataType()) != kNotPostReuseDataType.cend())) {
return true;
}
}
return false;
}
int64_t GetStreamId(const ge::OpDesc *const desc) {
return ge::MemReuseUtils::GetStreamId(desc);
}
std::string GetStreamIdDesc(const ge::OpDesc *const desc) {
std::string stream_id_str;
if (desc != nullptr) {
const int64_t stream_id = GetStreamId(desc);
stream_id_str = std::to_string(stream_id);
if (stream_id != desc->GetStreamId()) {
stream_id_str.append("--");
stream_id_str.append(std::to_string(desc->GetStreamId()));
}
}
return stream_id_str;
}
std::string GetName(const ge::MemoryBlock &block, bool last_node = false) {
if (!block.NodeTypeIndexList().empty()) {
if (last_node) {
return ToString(block.NodeTypeIndexList().back());
} else {
return ToString(block.NodeTypeIndexList().front());
}
}
return "";
}
const ge::Node *GeParentNode(const ge::Node *const node, const ge::ComputeGraphPtr &compute_graph, uint32_t depth) {
depth++;
const ge::Node *parent_node = nullptr;
if ((node != nullptr) && (depth < ge::kMaxDepthNum)) {
auto owner_graph = node->GetOwnerComputeGraphBarePtr();
if (owner_graph != nullptr) {
parent_node = owner_graph->GetParentNodeBarePtr();
const bool is_root_graph =
(parent_node != nullptr) && (parent_node->GetOwnerComputeGraphBarePtr() == compute_graph.get());
if (!is_root_graph) {
parent_node = GeParentNode(parent_node, compute_graph, depth);
}
}
}
if (parent_node == nullptr) {
return node;
}
return parent_node;
}
bool IsSubGraphNetOutNode(const ge::Node *const node, const ge::ComputeGraphPtr &compute_graph) {
if ((node != nullptr) && (node->GetType() == ge::NETOUTPUT) && (node->GetOpDescBarePtr() != nullptr)) {
auto owner_graph = node->GetOwnerComputeGraphBarePtr();
if (owner_graph == compute_graph.get()) {
return false;
}
for (uint32_t index = 0U; index < node->GetOpDescBarePtr()->GetInputsSize(); ++index) {
const auto input_desc = node->GetOpDescBarePtr()->GetInputDescPtr(index);
if ((input_desc != nullptr) && input_desc->HasAttr(ge::ATTR_NAME_PARENT_NODE_INDEX)) {
GELOGD("Node: %s is subgraph net out.", node->GetNamePtr());
return true;
}
}
}
return false;
}
bool PeerIsSubGraphNetOutNode(const ge::NodePtr &node, const ge::OutDataAnchorPtr &out_data_anchor,
const ge::ComputeGraphPtr &compute_graph) {
if (node != nullptr) {
for (const auto in_anchor : out_data_anchor->GetPeerInDataAnchorsPtr()) {
const auto peer_node = in_anchor->GetOwnerNodeBarePtr();
if (IsSubGraphNetOutNode(peer_node, compute_graph)) {
GELOGD("Node: %s peer node:%s is subgraph net out.", node->GetNamePtr(), peer_node->GetNamePtr());
return true;
}
}
}
return false;
}
bool CanNotLifeReuse(const ge::MemoryBlock &block, bool child_reuse = false) {
if ((!block.reuse_mem_) || ((!child_reuse) && block.child_block_)) {
return true;
}
return false;
}
bool CanReuseBlock(size_t life_begin, const ge::MemoryBlock &reusable_block, size_t block_size) {
bool can_reuse = false;
if (reusable_block.Size() == block_size) {
if (life_begin > 0) {
if (life_begin > reusable_block.GetLifeEnd(reusable_block.stream_id_)) {
can_reuse = true;
}
} else {
can_reuse = true;
}
}
return (can_reuse && (!CanNotLifeReuse(reusable_block)));
}
bool ReuseBlock(ge::MemoryBlock &block, const size_t block_size, const size_t life_begin,
const std::string &batch_label, const ge::NodeTypeIndex &node_type_index) {
if (block.IsNoAlignSizeReuseBlock() || block.IsRealSizeReuseBlock() || (block.batch_label_ != batch_label)) {
return false;
}
if (block.IsBlockTypeConflictWithNode(node_type_index)) {
return false;
}
if (block.diff_stream_prior_) {
return false;
}
if (CanReuseBlock(life_begin, block, block_size)) {
return true;
}
return false;
}
bool IsSubGraphInOrOutNode(const ge::Node *const node, const ge::ComputeGraphPtr &compute_graph) {
if (node != nullptr) {
auto owner_graph = node->GetOwnerComputeGraphBarePtr();
if (owner_graph != compute_graph.get()) {
std::string op_type(node->GetTypePtr());
if ((op_type == ge::DATA) || (op_type == ge::NETOUTPUT) || (op_type == ge::PARTITIONEDCALL)) {
return true;
}
}
}
return false;
}
bool IsDirectOutputNode(const ge::Node *const node, const ge::ComputeGraphPtr &compute_graph) {
if ((node == nullptr) || (node->GetOpDescBarePtr() == nullptr)
|| (node->GetOpDescBarePtr()->GetType() != ge::NETOUTPUT)
|| IsSubGraphNetOutNode(node, compute_graph)) {
return false;
}
GELOGD("This is model netoutput node:%s.", node->GetNamePtr());
return true;
}
bool IsDirectInputNode(const ge::Node *const node, const ge::ComputeGraphPtr &compute_graph) {
if (node != nullptr) {
auto owner_graph = node->GetOwnerComputeGraphBarePtr();
if ((owner_graph == compute_graph.get()) && ge::OpTypeUtils::IsDataNode(node->GetType())) {
GELOGD("This is model input node:%s.", node->GetNamePtr());
return true;
}
}
return false;
}
bool IsAtomicWorkSpace(const int64_t index, const std::map<std::string, std::map<int64_t, int64_t>> &atomic_workspace) {
for (auto const &it : atomic_workspace) {
if (it.second.empty()) {
continue;
}
for (const auto &workspae_info : it.second) {
if (workspae_info.first == index) {
GELOGD("Node:%s's workspace:%" PRId64 " is atomic.", it.first.c_str(), index);
return true;
}
}
}
return false;
}
static bool CompareNodeId(const ge::InDataAnchor *const left, const ge::InDataAnchor *const right) {
bool invalid_para = ((left == nullptr) || (left->GetPeerOutAnchor() == nullptr)
|| (left->GetPeerOutAnchor()->GetOwnerNodeBarePtr() == nullptr)
|| (left->GetPeerOutAnchor()->GetOwnerNodeBarePtr()->GetOpDescBarePtr() == nullptr)
|| (right == nullptr) || (right->GetPeerOutAnchor() == nullptr)
|| (right->GetPeerOutAnchor()->GetOwnerNodeBarePtr() == nullptr)
|| (right->GetPeerOutAnchor()->GetOwnerNodeBarePtr()->GetOpDescBarePtr() == nullptr));
if (invalid_para) {
return false;
}
return (left->GetPeerOutAnchor()->GetOwnerNodeBarePtr()->GetOpDescBarePtr()->GetId()
< right->GetPeerOutAnchor()->GetOwnerNodeBarePtr()->GetOpDescBarePtr()->GetId());
}
std::vector<ge::InDataAnchor *> GetSortAllInDataAnchors(const ge::NodePtr &node, const bool memory_priority_mode) {
std::vector<ge::InDataAnchor *> anchors;
for (const auto in_anchor : node->GetAllInDataAnchorsPtr()) {
anchors.emplace_back(in_anchor);
}
if (memory_priority_mode) {
std::sort(anchors.begin(), anchors.end(), CompareNodeId);
}
return anchors;
}
void HandleDependentStreamRedundantInfo(
std::pair<const int64_t, std::map<int64_t, std::set<ge::EdgeLife, ge::CompareEdgeLife>>> &in_stream_edge) {
for (auto &depend_stream_info : in_stream_edge.second) {
size_t in_node_id = 0U;
size_t out_node_id = 0U;
if (depend_stream_info.second.size() <= 1U) {
continue;
}
for (auto iter = depend_stream_info.second.begin(); iter != depend_stream_info.second.end();) {
if ((iter->node_id >= in_node_id) && (iter->peer_node_id <= out_node_id)) {
GELOGI("[StreamEdge]In depend Node: stream_id:[%" PRId64 "<-%" PRId64 "] life_time:[%zu<-%zu] delete", in_stream_edge.first,
depend_stream_info.first, iter->node_id, iter->peer_node_id);
iter = depend_stream_info.second.erase(iter);
continue;
}
in_node_id = iter->node_id;
out_node_id = iter->peer_node_id;
++iter;
}
}
}
void HandleInStreamRedundantDependence(ge::DiffStreamEdgeLife &in_stream_edges) {
for (auto &in_stream_edge : in_stream_edges) {
HandleDependentStreamRedundantInfo(in_stream_edge);
}
}
bool NotMatchNoReuseType(const std::set<std::string> &no_reuse_types, const std::string &type) {
const auto type_length = type.length();
const std::string::size_type version_length = 2U;
if ((type_length > version_length)
&& ((type.at(type_length - version_length) == 'V'))
&& (type.at(type_length - 1U) >= '1')
&& (type.at(type_length - 1U) <= '4')) {
return (no_reuse_types.count(type.substr(0, type_length - version_length)) == 0UL);
}
return (no_reuse_types.count(type) == 0UL);
}
}
namespace ge {
bool CrossLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) {
if ((left.node_ == nullptr) || (right.node_ == nullptr)) {
return true;
}
auto left_node_op_desc = left.node_->GetOpDescBarePtr();
auto right_node_op_desc = right.node_->GetOpDescBarePtr();
if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr)) {
if (left.GetLifeBegin() < right.GetLifeBegin()) {
if (left.life_time_end_ >= right.GetLifeBegin()) {
return true;
}
} else if (left.GetLifeBegin() == right.GetLifeBegin()) {
return true;
} else {
if (right.life_time_end_ >= left.GetLifeBegin()) {
return true;
}
}
}
return false;
}
static bool CanIntervalLifeReuse(const MemoryBlock &parent_block, MemoryBlock &child_block,
std::vector<MemoryBlock *> &clone_blocks) {
bool not_same_stream = ((parent_block.stream_id_ != child_block.stream_id_) || (!parent_block.same_stream_) ||
(!child_block.same_stream_) || parent_block.NodeTypeIndexList().empty() ||
child_block.NodeTypeIndexList().empty() ||
(parent_block.NodeTypeIndexList().back().diff_stream_life_time_.size() > 0U) ||
(child_block.NodeTypeIndexList().back().diff_stream_life_time_.size() > 0U));
if (not_same_stream) {
return false;
}
if (parent_block.IsBlockTypeConflict(child_block)) {
GELOGD("block type conflict, parent_block: %s(%s), child_block: %s(%s).", GetName(parent_block).c_str(),
parent_block.BlockTypeStr().c_str(), GetName(child_block).c_str(), child_block.BlockTypeStr().c_str());
return false;
}
bool can_interval_life_reuse = false;
auto clone_block = child_block.Clone();
if (clone_block == nullptr) {
return false;
}
bool same_size = ((child_block.NodeTypeIndexList().size() == child_block.RealSizeList().size())
&& (child_block.NodeTypeIndexList().size() == child_block.NoAlignSizeList().size()));
bool pre_node_cross = false;
if (same_size) {
for (auto it = child_block.NodeTypeIndexList().cbegin(); it != child_block.NodeTypeIndexList().cend();) {
bool cross_node = (((*it).ref_input_ && pre_node_cross)
|| ((!(*it).ref_input_) && parent_block.CrossLifeTimeNode(it, child_block)));
if (cross_node) {
size_t node_pos = it - child_block.NodeTypeIndexList().cbegin();
clone_block->AddNodeTypeIndex(*it, child_block.RealSizeList()[node_pos],
child_block.NoAlignSizeList()[node_pos], child_block.stream_id_);
it = child_block.DelNode(it);
pre_node_cross = true;
} else {
can_interval_life_reuse = true;
pre_node_cross = false;
++it;
}
}
}
child_block.UpdateContinuousFlag();
if (child_block.NodeTypeIndexList().empty()) {
child_block.Swap(*clone_block);
delete clone_block;
} else {
if (!clone_block->NodeTypeIndexList().empty()) {
clone_blocks.emplace_back(clone_block);
} else {
delete clone_block;
}
}
if (can_interval_life_reuse) {
GELOGD("Block size[%zu, %zu] life time are not cross.", parent_block.Size(), child_block.Size());
}
return can_interval_life_reuse;
}
bool SizeIndependentOfBatch(const std::string &node_type) {
static const std::unordered_set<std::string> kSizeIndependentOps = {
HCOMBROADCAST, HVDCALLBACKBROADCAST, HCOMALLREDUCE, HVDCALLBACKALLREDUCE, HCOMALLGATHER, HVDCALLBACKALLGATHER
};
return (kSizeIndependentOps.count(node_type) != 0UL);
}
static void GetDiffStreamMinLifeTime(const Node *const node, const int64_t src_stream,
const DiffStreamEdgeLife &in_stream_edge, int64_t &min_life_time) {
const auto node_op_desc = node->GetOpDescBarePtr();
const auto dst_stream = GetStreamId(node_op_desc);
if (dst_stream == src_stream) {
min_life_time = node_op_desc->GetId();
GELOGI("same stream, node[%s] id as min life[%" PRId64 "]", node_op_desc->GetNamePtr(), min_life_time);
return;
}
const auto it = in_stream_edge.find(dst_stream);
if (it != in_stream_edge.cend()) {
const auto edges_it = it->second.find(src_stream);
if (edges_it != it->second.cend()) {
auto edge_it = edges_it->second.lower_bound({static_cast<size_t>(node_op_desc->GetId()), 0UL});
if (edge_it != edges_it->second.end()) {
if ((edge_it->node_id > static_cast<size_t>(node_op_desc->GetId())) && (edge_it != edges_it->second.begin())) {
--edge_it;
}
if (edge_it->node_id <= static_cast<size_t>(node_op_desc->GetId())) {
min_life_time = (*edge_it).peer_node_id;
GELOGI("diff stream, get min life[%" PRId64 "], node[%s], id[%" PRId64 "], stream_id:[%" PRId64 "<-%" PRId64 "] life_time:[%zu<-%zu]",
min_life_time, node_op_desc->GetNamePtr(), node_op_desc->GetId(), dst_stream, src_stream,
(*edge_it).node_id, min_life_time);
return;
}
}
}
}
min_life_time = kMinLifeTime;
GELOGI("diff stream, get default min life[%" PRId64 "], node[%s], id[%" PRId64 "], stream_id[%" PRId64 "<-%" PRId64 "]",
min_life_time, node_op_desc->GetNamePtr(), node_op_desc->GetId(), dst_stream, src_stream);
}
void GetDiffStreamMaxLifeTime(const Node *const node, const int64_t stream_id,
const DiffStreamEdgeLife &diff_stream_edge_life, int64_t &max_life_time) {
max_life_time = kMaxLifeTime;
auto node_op_desc = node->GetOpDescBarePtr();
GE_CHECK_NOTNULL_JUST_RETURN(node_op_desc);
GELOGD("Out depend node:[%s] life begin:%" PRId64 " stream_id:[%" PRId64 "->%" PRId64 "]", node_op_desc->GetNamePtr(),
node_op_desc->GetId(), GetStreamId(node_op_desc), stream_id);
if (GetStreamId(node_op_desc) == stream_id) {
max_life_time = node_op_desc->GetId();
return;
}
const auto it = diff_stream_edge_life.find(GetStreamId(node_op_desc));
if (it == diff_stream_edge_life.cend()) {
return;
}
const auto edges_it = it->second.find(stream_id);
if (edges_it == it->second.cend()) {
return;
}
const auto edge_it =
edges_it->second.lower_bound({static_cast<size_t>(node_op_desc->GetId()), 0UL});
if (edge_it == edges_it->second.end()) {
return;
}
GELOGD("Node:[%s] life begin:%" PRId64 " stream_id:[%" PRId64 "->%" PRId64 "] life_time:[%" PRId64 "->%" PRId64 "]", node_op_desc->GetNamePtr(),
node_op_desc->GetId(), GetStreamId(node_op_desc), stream_id, (*edge_it).node_id, (*edge_it).peer_node_id);
max_life_time = (*edge_it).peer_node_id;
}
int64_t GetNodeMaxLifeBySymbol(const SymbolToAnchors &symbol_to_anchors, const Node *const n, uint32_t out_index,
int64_t &max_node_life_time_by_symbol, std::set<int64_t> &streams,
const DiffStreamEdgeLife &diff_stream_edge_life, int64_t stream_id = kInvalidStreamId) {
NodeIndexIO out_node_index_io(n, out_index, kOut);
const int64_t n_stream_id = (stream_id == kInvalidStreamId) ? GetStreamId(n->GetOpDescBarePtr()) : stream_id;
SymbolToAnchors::const_iterator iter =
symbol_to_anchors.find(out_node_index_io.ToString());
int64_t max_node_life_time = n->GetOpDescBarePtr()->GetId();
if (iter != symbol_to_anchors.cend()) {
for (const auto &node_index_io : iter->second) {
if ((node_index_io.io_type_ != kIn) || (node_index_io.node_ptr_ == nullptr) ||
(node_index_io.node_ptr_->GetOpDescBarePtr() == nullptr)) {
continue;
}
const int64_t in_anchor_stream_id = GetStreamId(node_index_io.node_ptr_->GetOpDescBarePtr());
if (node_index_io.node_ptr_->GetOpDescBarePtr()->GetOpKernelLibName() != kEngineNameGeLocal) {
streams.emplace(in_anchor_stream_id);
}
if (node_index_io.node_ptr_->GetOpDescBarePtr()->GetId() > max_node_life_time_by_symbol) {
max_node_life_time_by_symbol = node_index_io.node_ptr_->GetOpDescBarePtr()->GetId();
max_node_life_time = (max_node_life_time_by_symbol > max_node_life_time)
? max_node_life_time_by_symbol : max_node_life_time;
GELOGI("Node[%s] stream[%" PRId64 "] output[%u]'s life time by symbol [%" PRId64 "][%" PRId64 "], node_io[%s], stream_id[%" PRId64 "].",
n->GetNamePtr(), n_stream_id, out_index, max_node_life_time_by_symbol, max_node_life_time,
node_index_io.node_ptr_->GetNamePtr(), in_anchor_stream_id);
}
if (n_stream_id != in_anchor_stream_id) {
int64_t diff_stream_life_time_end = kMaxLifeTime;
GetDiffStreamMaxLifeTime(node_index_io.node_ptr_, n_stream_id, diff_stream_edge_life,
diff_stream_life_time_end);
GELOGI("Node[%s] stream[%" PRId64 "] output[%u]'s life time is max of [%" PRId64 "][%" PRId64 "][%" PRId64 "], node_io[%s], stream_id[%" PRId64 "].",
n->GetNamePtr(), n_stream_id, out_index, max_node_life_time_by_symbol, max_node_life_time,
diff_stream_life_time_end, node_index_io.node_ptr_->GetNamePtr(), in_anchor_stream_id);
max_node_life_time = std::max(max_node_life_time_by_symbol,
std::max(diff_stream_life_time_end, max_node_life_time));
}
}
}
GELOGI("Node[%s] output[%u]'s max life time[%" PRId64 "][%" PRId64 "].", n->GetNamePtr(), out_index, max_node_life_time_by_symbol,
max_node_life_time);
return max_node_life_time;
}
int64_t GetNodeMaxLife(const SymbolToAnchors &symbol_to_anchors,
const DiffStreamEdgeLife &diff_stream_edge_life, const Node *const n, uint32_t out_index,
int64_t &max_node_life_time_by_symbol, std::set<int64_t> &streams, int64_t stream_id = kInvalidStreamId) {
const int64_t max_node_life_time = GetNodeMaxLifeBySymbol(symbol_to_anchors, n, out_index,
max_node_life_time_by_symbol, streams, diff_stream_edge_life, stream_id);
GELOGD("Node[%s] output[%u]'s max life time[%" PRId64 "].", n->GetNamePtr(), out_index, max_node_life_time);
return max_node_life_time;
}
void GetContinuousOutputMaxLife(const NodePtr &node, const SymbolToAnchors &symbol_to_anchors,
const DiffStreamEdgeLife &out_stream_edges, int64_t &max_life_time,
std::set<int64_t> &streams) {
auto node_op_desc = node->GetOpDescBarePtr();
GE_CHECK_NOTNULL_JUST_RETURN(node_op_desc);
for (uint32_t index = 0U; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) {
const int64_t
life_time = GetNodeMaxLife(symbol_to_anchors, out_stream_edges, node.get(), index, max_life_time, streams);
if (life_time > max_life_time) {
max_life_time = life_time;
}
}
GELOGI("Continuous output node:%s max life time:%" PRId64 "", node->GetNamePtr(), max_life_time);
}
void GetContinuousOutputMaxLifeBySymbol(const Node *const node,
const SymbolToAnchors &symbol_to_anchors,
int64_t &max_life_time,
const DiffStreamEdgeLife &diff_stream_edge_life) {
std::set<int64_t> streams;
const auto node_op_desc = node->GetOpDescBarePtr();
GE_CHECK_NOTNULL_JUST_RETURN(node_op_desc);
for (uint32_t index = 0U; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) {
(void)GetNodeMaxLifeBySymbol(symbol_to_anchors, node, index, max_life_time, streams, diff_stream_edge_life);
}
GELOGI("Continuous output node:%s max life time:%" PRId64 " by symbol", node->GetNamePtr(), max_life_time);
}
Status SetChildHeadOffset(size_t offset, size_t max_offset, std::vector<MemoryBlock *> &blocks) {
for (auto block : blocks) {
if (block != nullptr) {
GE_ASSERT_SUCCESS(block->SetHeadOffset(offset), "set head offset failed, offset: %zu, block head offset: %zu,"
" max_offset: %zu", offset, block->HeadOffset(), max_offset);
offset += block->Size();
GE_ASSERT_TRUE(offset <= max_offset, "offset: %zu, max_offset: %zu", offset, max_offset);
}
}
return SUCCESS;
}
void SetChildTailOffset(size_t offset, std::vector<MemoryBlock *> &blocks) {
for (auto block : blocks) {
if (block != nullptr) {
offset += block->Size();
block->SetTailOffset(offset - 1UL);
}
}
}
Status MemoryBlock::SetHeadOffset(size_t offset) {
head_offset_ = offset;
GE_ASSERT_TRUE(head_offset_ < std::numeric_limits<size_t>::max() - block_size_,
"head_offset_: %zu, block_size_: %zu", head_offset_, block_size_);
const auto max_offset = head_offset_ + block_size_;
GE_ASSERT_SUCCESS(SetChildHeadOffset(head_offset_, max_offset, child_blocks_),
"set child block failed, head_offset: %zu, max_offset: %zu", head_offset_, max_offset);
GE_ASSERT_SUCCESS(SetChildHeadOffset(head_offset_, max_offset, sub_graph_blocks_),
"set subgraph block failed, head_offset: %zu, max_offset: %zu", head_offset_, max_offset);
for (auto &blocks : batch_to_blocks_) {
GE_ASSERT_SUCCESS(SetChildHeadOffset(head_offset_, max_offset, blocks.second),
"set batch block failed, head_offset: %zu, max_offset: %zu", head_offset_, max_offset);
}
return SUCCESS;
}
void MemoryBlock::SetTailOffset(size_t offset) {
tail_offset_ = offset;
SetChildTailOffset(head_offset_, child_blocks_);
SetChildTailOffset(head_offset_, sub_graph_blocks_);
for (auto &blocks : batch_to_blocks_) {
SetChildTailOffset(head_offset_, blocks.second);
}
}
std::vector<MemoryBlock *> MemoryBlock::AllChildBlockList() const {
std::vector<MemoryBlock *> return_child_blocks;
return_child_blocks.insert(return_child_blocks.end(), sub_graph_blocks_.cbegin(), sub_graph_blocks_.cend());
for (auto &batch_blocks : batch_to_blocks_) {
return_child_blocks.insert(return_child_blocks.end(), batch_blocks.second.cbegin(), batch_blocks.second.cend());
}
return_child_blocks.insert(return_child_blocks.end(), child_blocks_.cbegin(), child_blocks_.cend());
return return_child_blocks;
}
void MemoryBlock::Resize() {
size_t child_block_size = 0;
for (auto block : child_blocks_) {
if (block != nullptr) {
block->Resize();
child_block_size += block->Size();
}
}
auto iter = std::max_element(real_size_list_.begin(), real_size_list_.end());
if (iter == real_size_list_.end()) {
GELOGW("real_size_list_ is empty");
return;
} else {
size_t block_size = (child_block_size > *iter) ? child_block_size : *iter;
if ((block_size > 0UL) && (block_size % MEM_ALIGN_SIZE != 0UL)) {
MemReuseUtils::AlignMemOffset(block_size);
}
block_size_ = block_size;
}
}
size_t MemoryBlock::AlignSize() {
if (max_real_size_ == 0UL) {
auto iter = std::max_element(real_size_list_.begin(), real_size_list_.end());
if (iter == real_size_list_.end()) {
GELOGW("real_size_list_ is empty");
} else {
max_real_size_ = *iter;
if ((max_real_size_ > 0UL) && ((max_real_size_ % MEM_ALIGN_SIZE) != 0UL)) {
MemReuseUtils::AlignMemOffset(max_real_size_);
}
}
}
return max_real_size_;
}
bool MemoryBlock::IsSameBatchLabel() const {
if (batch_label_.empty() || node_type_index_list_.empty()) {
return false;
}
bool all_same_label = true;
for (size_t index = 1UL; index < node_type_index_list_.size(); ++index) {
if (node_type_index_list_[index].node_ == nullptr) {
continue;
}
std::string batch_label;
const auto index_op_desc = node_type_index_list_[index].node_->GetOpDescBarePtr();
GE_IF_BOOL_EXEC(index_op_desc == nullptr, continue);
(void)ge::AttrUtils::GetStr(index_op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
if (batch_label_ != batch_label) {
all_same_label = false;
break;
}
}
return all_same_label;
}
bool MemoryBlock::IsGraphInputAndGetSize(const ComputeGraphPtr &compute_graph, size_t &size) const {
for (const auto &node_type_index : node_type_index_list_) {
const auto node = node_type_index.node_;
if (IsDirectInputNode(node, compute_graph)) {
size = node_type_index.no_align_size_;
GELOGD("Node:%s is input of %s, size=%zu", node->GetNamePtr(), compute_graph->GetName().c_str(), size);
return true;
}
}
return false;
}
void MemoryBlock::AddContinuousLifeReuseBlock(MemoryBlock &block) {
auto it_block = std::max_element(std::begin(block.NoAlignSizeList()), std::end(block.NoAlignSizeList()));
auto it_this = std::max_element(std::begin(NoAlignSizeList()), std::end(NoAlignSizeList()));
if (it_block != std::end(block.NoAlignSizeList()) && it_this != std::end(NoAlignSizeList())) {
if ((IsNoAlignSizeReuseBlock() && block.IsNoAlignSizeReuseBlock()) ||
(IsNoAlignSizeReuseBlock() && (*it_this < *it_block)) ||
(block.IsNoAlignSizeReuseBlock() && (*it_this > *it_block))) {
GELOGD("Conflict current block size:%zu continuous:%d, reuse block max size:%zu continuous:%d.",
*it_this, GetContinuousFlag(), *it_block, block.GetContinuousFlag());
return;
}
}
if (IsBlockTypeConflict(block)) {
GELOGD("block type conflict, this: %s(%s), param block: %s(%s).", GetName(*this).c_str(), BlockTypeStr().c_str(),
GetName(block).c_str(), block.BlockTypeStr().c_str());
return;
}
MemoryBlock *parent = nullptr;
MemoryBlock *child = nullptr;
if (((child_offset_ + block.AlignSize()) <= *it_this) && (IsNoAlignSizeReuseBlock())) {
parent = this;
child = █
} else if (((block.child_offset_ + AlignSize()) <= *it_block) && (block.IsNoAlignSizeReuseBlock()) &&
(AlignSize() == block.AlignSize()) && child_blocks_.empty()) {
parent = █
child = this;
} else {
return;
}
parent->child_blocks_.emplace_back(child);
parent->child_offset_ += child->AlignSize();
child->child_block_ = true;
GELOGI("[no_align_size_block_reuse]"
"Add block[%s size:%zu, stream id:%" PRId64 ", life time[begin:%zu, end:%zu], continuous:%d]"
" to block[%s size:%zu, stream id:%" PRId64 ", life time[begin:%zu, end:%zu], continuous:%d]",
GetName(*child).c_str(), child->block_size_, child->stream_id_, child->GetLifeBegin(),
child->GetLifeEnd(child->stream_id_), child->GetContinuousFlag(),
GetName(*parent).c_str(), parent->block_size_, parent->stream_id_, parent->GetLifeBegin(),
parent->GetLifeEnd(parent->stream_id_), parent->GetContinuousFlag());
return;
}
void MemoryBlock::AddZeroCopyLifeReuseBlock(MemoryBlock &block) {
auto it_block = std::max_element(block.real_size_list_.begin(), block.real_size_list_.end());
auto it_this = std::max_element(real_size_list_.begin(), real_size_list_.end());
if ((it_block == block.real_size_list_.end()) || (it_this == real_size_list_.end())) {
return;
}
if ((is_zero_copy_ && block.is_zero_copy_) ||
(is_zero_copy_ && (*it_this < *it_block)) ||
(block.is_zero_copy_ && (*it_this > *it_block))) {
GELOGD("Conflict current block size:%zu is_reuse_zero_copy:%d is_zero_copy:%d, "
"reuse block max size:%zu is_reuse_zero_copy:%d is_zero_copy:%d.", *it_this, is_reuse_zero_copy_,
is_zero_copy_, *it_block, block.is_reuse_zero_copy_, block.is_zero_copy_);
return;
}
if (IsBlockTypeConflict(block)) {
GELOGD("block type conflict, this: %s(%s), param block: %s(%s).", GetName(*this).c_str(), BlockTypeStr().c_str(),
GetName(block).c_str(), block.BlockTypeStr().c_str());
return;
}
MemoryBlock *parent = nullptr;
MemoryBlock *child = nullptr;
if ((((child_offset_ + block.AlignSize()) <= *it_this) || ((child_offset_ == 0UL) && (block.child_offset_ == 0UL) &&
(*it_block == *it_this))) && is_zero_copy_) {
parent = this;
child = █
} else if ((((block.child_offset_ + AlignSize()) <= *it_block) || ((child_offset_ == 0UL) &&
(block.child_offset_ == 0UL) && (*it_block == *it_this))) && block.is_zero_copy_ &&
(AlignSize() == block.AlignSize()) && child_blocks_.empty()) {
parent = █
child = this;
} else {
return;
}
if ((parent->is_zero_copy_) && (!child->is_reuse_zero_copy_)) {
return;
}
parent->child_blocks_.emplace_back(child);
parent->child_offset_ += child->AlignSize();
child->child_block_ = true;
parent->is_reuse_zero_copy_ = (child->is_reuse_zero_copy_ && parent->is_reuse_zero_copy_);
GELOGI("[zero_copy_size_block_reuse]"
"Add block[%s size:%zu, stream id:%" PRId64 ", life time[begin:%zu, end:%zu], continuous:%d, is_zero_copy:%d]"
" to block[%s size:%zu, stream id:%" PRId64 ", life time[begin:%zu, end:%zu], continuous:%d, is_zero_copy:%d]",
GetName(*child).c_str(), child->block_size_, child->stream_id_, child->GetLifeBegin(),
child->GetLifeEnd(child->stream_id_), child->GetContinuousFlag(), child->is_zero_copy_,
GetName(*parent).c_str(), parent->block_size_, parent->stream_id_, parent->GetLifeBegin(),
parent->GetLifeEnd(parent->stream_id_), parent->GetContinuousFlag(), parent->is_zero_copy_);
return;
}
bool CanBlockLifeReuse(const BlockMemAssigner *const mem_assigner,
const MemoryBlock &in_block, const MemoryBlock &out_block,
DiffStreamEdgeLife &diff_stream_edge_life) {
const auto first_node = out_block.NodeTypeIndexList().front();
if ((first_node.mem_type_ == kOutput)
&& mem_assigner->HasSameOutAnchorWithDiffStream(first_node.node_, first_node.index_)) {
GELOGD("out_block first node %s(topoid: %lld) output %u use same memory with node on other stream, return false.",
first_node.node_->GetNamePtr(), first_node.node_->GetOpDescBarePtr()->GetId(), first_node.index_);
return false;
}
if (in_block.IsBlockTypeConflict(out_block)) {
GELOGD("block type conflict, in_block: %s(%s), out_block: %s.", GetName(in_block).c_str(),
in_block.BlockTypeStr().c_str(), GetName(out_block).c_str(), out_block.BlockTypeStr().c_str());
return false;
}
GELOGD("in_block[%s] out_block[%s]", GetName(in_block).c_str(), GetName(out_block).c_str());
if (in_block.stream_id_ == out_block.stream_id_) {
return (out_block.GetLifeBegin() > in_block.GetLifeEnd(out_block.stream_id_));
} else {
auto depend_node_id = out_block.GetDependLifeBegin(in_block.stream_id_, diff_stream_edge_life);
size_t tail_node_id = 0UL;
const auto &node_type_index = in_block.NodeTypeIndexList().back();
if (node_type_index.node_ != nullptr) {
auto node_op_desc = node_type_index.node_->GetOpDescBarePtr();
if (node_op_desc != nullptr) {
tail_node_id = static_cast<size_t>(node_op_desc->GetId());
}
}
if ((tail_node_id != 0UL) && (depend_node_id >= tail_node_id)) {
if (!in_block.GetReuseStrategy().memory_priority_mode_) {
return (depend_node_id > in_block.GetLifeEnd(out_block.stream_id_));
}
int64_t end_stream_id = kInvalidStreamId;
auto in_block_life_end = in_block.GetLifeEnd(out_block.stream_id_, end_stream_id);
if (end_stream_id == out_block.stream_id_) {
return (out_block.GetLifeBegin() > in_block_life_end);
}
if ((end_stream_id != in_block.stream_id_) && (end_stream_id != kInvalidStreamId)) {
return (out_block.GetDependLifeBegin(end_stream_id, diff_stream_edge_life) >= in_block_life_end);
}
return (depend_node_id > in_block_life_end);
}
}
return false;
}
bool MemoryBlock::AddLifeReuseBlock(const BlockMemAssigner *const mem_assigner,
MemoryBlock *block, std::vector<MemoryBlock *> &clone_blocks, uint32_t depth,
DiffStreamEdgeLife &diff_stream_edge_life, bool child_reuse) {
GELOGD("this[%s size:%zu, stream id:%" PRId64 " life time[begin:%zu, end:%zu] childs:%zu] "
"block[%s size:%zu, stream id:%" PRId64 ", life time[begin:%zu, end:%zu] childs:%zu]", GetName(*this).c_str(),
block_size_, stream_id_, GetLifeBegin(), GetLifeEnd(block->stream_id_), child_blocks_.size(),
GetName(*block).c_str(), block->block_size_, block->stream_id_, block->GetLifeBegin(),
block->GetLifeEnd(stream_id_), block->child_blocks_.size());
++depth;
const bool can_not_life_reuse = (CanNotLifeReuse(*this, child_reuse) || CanNotLifeReuse(*block) ||
(batch_label_ != block->batch_label_) || (memory_type_ != block->memory_type_) || (depth > kMaxDepthNum));
if (can_not_life_reuse || (!block->child_blocks_.empty())) {
return false;
}
bool can_block_life_reuse = CanBlockLifeReuse(mem_assigner, *this, *block, diff_stream_edge_life) ||
CanBlockLifeReuse(mem_assigner, *block, *this, diff_stream_edge_life);
const bool is_continue_reuse_zero_copy = (is_zero_copy_ && (block->GetFirstContinuousFlag() ||
block->GetLastContinuousFlag() || block->GetContinuousFlag())) || (block->is_zero_copy_ &&
(GetFirstContinuousFlag() || GetLastContinuousFlag() || GetContinuousFlag()));
GELOGD("continuous cannot reuse zero copy, is_continue_not_reuse_zero_copy:%d", is_continue_reuse_zero_copy);
if (is_continue_reuse_zero_copy) {
return false;
}
const bool is_no_align_size_reuse_block = IsNoAlignSizeReuseBlock() || block->IsNoAlignSizeReuseBlock();
if (is_no_align_size_reuse_block) {
if (can_block_life_reuse) {
AddContinuousLifeReuseBlock(*block);
}
return true;
}
const bool is_real_size_reuse_block = IsRealSizeReuseBlock() || block->IsRealSizeReuseBlock();
if (is_real_size_reuse_block) {
if (can_block_life_reuse) {
AddZeroCopyLifeReuseBlock(*block);
}
return true;
}
if (!can_block_life_reuse && !CanIntervalLifeReuse(*this, *block, clone_blocks)) {
return false;
}
for (auto child_block : child_blocks_) {
if ((child_block != nullptr) &&
child_block->AddLifeReuseBlock(mem_assigner, block, clone_blocks, depth, diff_stream_edge_life, true)) {
return true;
}
}
if (block->AlignSize() != MEM_ALIGN_SIZE) {
if ((child_offset_ + block->AlignSize()) > (AlignSize() + MEM_ALIGN_SIZE)) {
return false;
}
} else {
if ((child_offset_ + block->AlignSize()) > AlignSize()) {
return false;
}
}
child_blocks_.emplace_back(block);
is_reuse_zero_copy_ = (block->is_reuse_zero_copy_ && is_reuse_zero_copy_);
child_offset_ += block->AlignSize();
block->child_block_ = true;
GELOGI("Add block[%s size:%zu, stream id:%" PRId64 " life time[begin:%zu, end:%zu]] to"
" block[%s size:%zu, stream id:%" PRId64 ", life time[begin:%zu, end:%zu]]", GetName(*block).c_str(), block->block_size_,
block->stream_id_, block->GetLifeBegin(), block->GetLifeEnd(stream_id_), GetName(*this).c_str(), block_size_,
stream_id_, GetLifeBegin(), GetLifeEnd(block->stream_id_));
return true;
}
size_t MemoryBlock::GetLifeBegin(bool for_sort) const {
if (!node_type_index_list_.empty()) {
return node_type_index_list_.front().GetLifeBegin(for_sort);
}
return 0UL;
}
size_t MemoryBlock::GetDependLifeBegin(int64_t stream_id, DiffStreamEdgeLife &diff_stream_edge_life) const {
GELOGD("In depend node:[%s] stream_id:[%" PRId64 "->%" PRId64 "] self life time[%" PRId64 "-%" PRId64 "]",
NodeTypeIndexList().front().node_->GetNamePtr(), stream_id_, stream_id, GetLifeBegin(),
GetLifeEnd(stream_id));
const auto it = diff_stream_edge_life.find(stream_id_);
if (it == diff_stream_edge_life.cend()) {
return 0UL;
}
const auto edges_it = it->second.find(stream_id);
if (edges_it == it->second.cend()) {
return 0UL;
}
auto first_node_id = GetLifeBegin();
auto edge_it = edges_it->second.lower_bound({first_node_id, 0UL});
if (edges_it->second.empty()) {
return 0UL;
}
if ((edge_it == edges_it->second.end()) || ((*edge_it).node_id > first_node_id)) {
if (edge_it == edges_it->second.begin()) {
GELOGD("Depend lower node id:%" PRId64 " > node id:%" PRId64 ".", (*edge_it).node_id, GetLifeBegin());
return 0UL;
}
--edge_it;
}
GELOGD("Node:[%s] life begin:%" PRId64 " stream_id:[%" PRId64 "->%" PRId64 "] depend life_time:[%" PRId64 "->%" PRId64 "]",
NodeTypeIndexList().front().node_->GetNamePtr(), first_node_id, stream_id_, stream_id, (*edge_it).node_id,
(*edge_it).peer_node_id);
return (*edge_it).peer_node_id;
}
size_t MemoryBlock::GetLifeEnd(int64_t stream_id) const {
if (!node_type_index_list_.empty()) {
const bool only_to_one_stream = (node_type_index_list_.back().out_stream_count_ == 1U)
&& (node_type_index_list_.back().diff_stream_life_time_.size() == 1U);
const auto it = node_type_index_list_.back().diff_stream_life_time_.find(stream_id);
if (only_to_one_stream && (it != node_type_index_list_.back().diff_stream_life_time_.cend())) {
GELOGD("block %s stream[%" PRId64 "] [%" PRId64 "] life[%" PRId64 "]", GetName(*this).c_str(), stream_id_,
stream_id, it->second);
return it->second;
}
GELOGD("block %s stream[%" PRId64 "] [%" PRId64 "] life[%" PRId64 "]", GetName(*this).c_str(), stream_id_, stream_id,
node_type_index_list_.back().life_time_end_);
return node_type_index_list_.back().life_time_end_;
}
return kMaxLifeTime;
}
size_t MemoryBlock::GetLifeEnd(int64_t stream_id, int64_t &end_stream_id) const {
end_stream_id = stream_id_;
if (!node_type_index_list_.empty()) {
const bool only_to_one_stream = (node_type_index_list_.back().out_stream_count_ == 1U)
&& (node_type_index_list_.back().diff_stream_life_time_.size() == 1U);
if (!only_to_one_stream) {
GELOGD("block %s stream[%" PRId64 "] [%" PRId64 "] life[%" PRId64 "]",
GetName(*this).c_str(), stream_id_, stream_id,
node_type_index_list_.back().life_time_end_);
return node_type_index_list_.back().life_time_end_;
}
const auto it = node_type_index_list_.back().diff_stream_life_time_.find(stream_id);
if (it != node_type_index_list_.back().diff_stream_life_time_.cend()) {
GELOGD("block %s stream[%" PRId64 "] [%" PRId64 "] life[%" PRId64 "]",
GetName(*this).c_str(), stream_id_, stream_id, it->second);
end_stream_id = stream_id;
return it->second;
}
if (stream_id != stream_id_) {
end_stream_id = node_type_index_list_.back().diff_stream_life_time_.begin()->first;
GELOGD("block %s stream[%" PRId64 "] [%" PRId64 "] [%" PRId64 "] life[%" PRId64 "]",
GetName(*this).c_str(), stream_id_, end_stream_id, stream_id,
node_type_index_list_.back().diff_stream_life_time_.begin()->second);
return node_type_index_list_.back().diff_stream_life_time_.begin()->second;
}
}
return kMaxLifeTime;
}
size_t MemoryBlock::GetSymbolLifeEnd() const {
if (!node_type_index_list_.empty()) {
return node_type_index_list_.back().symbol_max_life_time_end_;
}
return kDefaultLifeTime;
}
void MemoryBlock::SetSymbolLifeEnd(size_t symbol_life_end) {
if (!node_type_index_list_.empty()) {
if ((node_type_index_list_.back().symbol_max_life_time_end_ == kDefaultLifeTime) ||
symbol_life_end > node_type_index_list_.back().symbol_max_life_time_end_) {
node_type_index_list_.back().symbol_max_life_time_end_ = symbol_life_end;
}
}
}
void MemoryBlock::SetLifeTimeEnd(size_t time, int64_t stream_id) {
if (!node_type_index_list_.empty()) {
if (stream_id != stream_id_) {
auto it = node_type_index_list_.back().diff_stream_life_time_.find(stream_id);
if (it == node_type_index_list_.back().diff_stream_life_time_.end()) {
node_type_index_list_.back().diff_stream_life_time_[stream_id] = time;
} else if (time > it->second) {
it->second = time;
} else {}
if (node_type_index_list_.back().life_time_end_ == kDefaultLifeTime) {
node_type_index_list_.back().life_time_end_ = kMaxLifeTime;
}
} else {
if ((node_type_index_list_.back().life_time_end_ == kDefaultLifeTime) ||
(time > node_type_index_list_.back().life_time_end_)) {
node_type_index_list_.back().life_time_end_ = time;
}
}
}
}
void MemoryBlock::SetOutStreamLifeTime(size_t out_time, size_t end_time, int64_t stream_id) {
const size_t symbol_life_time = GetSymbolLifeEnd();
if ((symbol_life_time != kDefaultLifeTime) && (end_time < symbol_life_time)) {
end_time = symbol_life_time;
GELOGI("Block %s has continuous input node, which include multiple ref, end time is: %" PRId64 "",
GetName(*this, true).c_str(), end_time);
}
end_time = (end_time < out_time) ? kMaxLifeTime : end_time;
if (!node_type_index_list_.empty()) {
auto iter = node_type_index_list_.back().out_stream_life_time_.find(stream_id);
if (iter == node_type_index_list_.back().out_stream_life_time_.end()) {
node_type_index_list_.back().out_stream_life_time_.emplace(stream_id, std::make_pair(out_time, end_time));
node_type_index_list_.back().SetOutStreamCount(node_type_index_list_.back().out_stream_life_time_.size());
return;
}
if (out_time > iter->second.first) {
iter->second.first = out_time;
}
if (end_time > iter->second.second) {
iter->second.second = end_time;
}
}
}
bool MemoryBlock::CrossLifeTimeNode(const std::vector<NodeTypeIndex>::const_iterator &it,
const MemoryBlock &child_block) const {
if (node_type_index_list_.empty()) {
return false;
}
const NodeTypeIndex &node_type_index = *it;
if (!((node_type_index.life_time_end_ < node_type_index_list_.front().GetLifeBegin()) ||
(node_type_index.GetLifeBegin() > node_type_index_list_.back().life_time_end_))) {
for (const auto &node : node_type_index_list_) {
if (CrossLifeTime(node, node_type_index)) {
return true;
}
}
}
if (node_type_index.next_is_ref_input_) {
auto ref_it = it;
ref_it++;
for (; ref_it != child_block.NodeTypeIndexList().cend(); ++ref_it) {
if (!(*ref_it).ref_input_) {
break;
}
for (const auto &node : node_type_index_list_) {
if (CrossLifeTime(node, *ref_it)) {
return true;
}
}
}
}
return false;
}
MemoryBlock *MemoryBlock::Clone() const {
auto block = new (std::nothrow) MemoryBlock(reuse_strategy_, block_size_, stream_id_, reuse_mem_, memory_type_);
if (block != nullptr) {
block->same_stream_ = same_stream_;
block->is_zero_copy_ = is_zero_copy_;
block->is_reuse_zero_copy_ = is_reuse_zero_copy_;
block->memory_type_logic_base_ = memory_type_logic_base_;
block->need_same_offset_in_batch_ = need_same_offset_in_batch_;
block->ref_count_ = ref_count_;
block->input_index_ = input_index_;
block->batch_label_ = batch_label_;
block->has_sub_graph_in_out_node_ = has_sub_graph_in_out_node_;
block->post_reuse_flag_ = post_reuse_flag_;
block->is_fixed_addr_prior_ = is_fixed_addr_prior_;
block->block_type_list_ = block_type_list_;
}
return block;
}
void MemoryBlock::UpdateContinuousFlag () {
first_continuous_block_ = false;
last_continuous_block_ = false;
continuous_block_ = false;
for (const auto &node : node_type_index_list_) {
if (node.GetFirstContinuousNodeFlag()) {
first_continuous_block_ = true;
}
if (node.GetLastContinuousNodeFlag()) {
last_continuous_block_ = true;
}
if (node.GetContinuousNodeFlag()) {
continuous_block_ = true;
}
}
}
std::vector<NodeTypeIndex>::const_iterator MemoryBlock::DelNode(std::vector<NodeTypeIndex>::const_iterator &it) {
if ((node_type_index_list_.size() == real_size_list_.size())
&& (node_type_index_list_.size() == no_align_size_list_.size())) {
const auto to_delete = *it;
size_t node_pos = it - node_type_index_list_.begin();
auto return_it = node_type_index_list_.erase(it);
real_size_list_.erase(real_size_list_.cbegin() + node_pos);
no_align_size_list_.erase(no_align_size_list_.cbegin() + node_pos);
block_type_list_.WithDeleted(*this, to_delete);
return return_it;
}
return ++it;
}
void MemoryBlock::Swap(MemoryBlock &block) {
node_type_index_list_.swap(block.node_type_index_list_);
real_size_list_.swap(block.real_size_list_);
no_align_size_list_.swap(block.no_align_size_list_);
block_type_list_.swap(block.block_type_list_);
}
void SetLastUsedInputMemAttr(const NodePtr &node, int32_t input_index, std::vector<TAttr<bool>> &bool_attr) {
if (node == nullptr) {
return;
}
auto node_op_desc = node->GetOpDescBarePtr();
if (node_op_desc != nullptr) {
auto input_desc = node_op_desc->MutableInputDesc(input_index);
if (input_desc == nullptr) {
return;
}
bool_attr.emplace_back(input_desc.get(), node_op_desc, input_index, ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE, true);
}
}
std::string MemoryBlock::String() const {
std::stringstream ss;
ss << "Block size: " << Size() << " from " << HeadOffset() << " to " << TailOffset() << " ";
ss << "ref_count: " << ref_count_ << " ";
ss << "stream_id: " << stream_id_ << " ";
ss << "is_zero_copy: " << is_zero_copy_ << " ";
ss << "reuse_mem_: " << reuse_mem_ << " ";
ss << "no_align_size: " << ToString(no_align_size_list_) << " ";
ss << "real_size_list: " << ToString(real_size_list_) << " ";
ss << "members: ";
for (auto x : NodeTypeIndexList()) {
ss << "__node: " << ::ToString(x) << " ";
}
for (const auto& symbol : SymbolList()) {
ss << "__symbol: " << symbol << " ";
}
ss << "memory_type: " << memory_type_ << " ";
return ss.str();
}
BlockMemAssigner::BlockMemAssigner(const MemAssistInfo &mem_assist_info)
: compute_graph_(mem_assist_info.compute_graph), symbol_to_anchors_(mem_assist_info.symbol_to_anchors),
anchor_to_symbol_(mem_assist_info.anchor_to_symbol), life_time_(0),
parent_nodes_to_stream_ids_(mem_assist_info.parent_nodes_to_stream_ids) {
std::string memory_optimization_policy;
ge::GetContext().GetOption(MEMORY_OPTIMIZATION_POLICY, memory_optimization_policy);
if (memory_optimization_policy == kMemoryPriority) {
memory_priority_mode_ = true;
}
strict_reuse_zero_memory_mode_ = IsStrictReuseZeroMemoryMode();
(void)InitIoReuseFlag();
ParseGraphIoAllocMode();
std::string refreshable;
(void)ge::GetContext().GetOption(ge::OPTION_FEATURE_BASE_REFRESHABLE, refreshable);
is_feature_map_refreshable_ = (refreshable == "1");
input_fusion_size_ = ge::GetContext().GetInputFusionSize();
GELOGI("feature map refreshable: %d, input_fusion_size: %" PRIu64, is_feature_map_refreshable_, input_fusion_size_);
}
BlockMemAssigner::~BlockMemAssigner() {
GELOGD("[Destruct][BlockMemAssigner]blocks_store_ size : %lu", blocks_store_.size());
for (MemoryBlock *memory_block : blocks_store_) {
GE_DELETE_NEW_SINGLE(memory_block);
}
}
void GetMaxBatchAllMemorySize(std::map<std::string, std::vector<int64_t>> &batch_all_memory_size,
std::map<std::string, int64_t> batch_total_size, std::vector<int64_t> &all_memory_size,
std::string &max_batch_label) {
int64_t max_batch_size = 0;
for (const auto &it : batch_total_size) {
GELOGI("Batch[%s] total memory size[%" PRId64 "]", it.first.c_str(), it.second);
if (it.first.empty()) {
continue;
}
if (it.second > max_batch_size) {
max_batch_size = it.second;
max_batch_label = it.first;
}
}
GELOGI("Max batch[%s] total memory size[%" PRId64 "]", max_batch_label.c_str(), max_batch_size);
for (const auto &it : batch_all_memory_size) {
if (it.first.empty() || (it.first == max_batch_label)) {
all_memory_size.insert(all_memory_size.cend(), it.second.cbegin(), it.second.cend());
}
}
if (all_memory_size.empty()) {
all_memory_size.emplace_back(MEM_ALIGN_SIZE);
}
sort(all_memory_size.begin(), all_memory_size.end());
GELOGD("All memory size: %s", ToString(all_memory_size).c_str());
for (auto iter = all_memory_size.begin(); iter != all_memory_size.end();) {
if (*iter == 0) {
iter = all_memory_size.erase(iter);
} else {
++iter;
}
}
}
void BlockMemAssigner::InsertStreamOutEdge() {
for (const auto &dst_stream_to_edges : in_stream_edges_) {
const auto dst_stream_id = dst_stream_to_edges.first;
for (const auto &src_stream_to_edges : dst_stream_to_edges.second) {
const auto src_stream_id = src_stream_to_edges.first;
auto &out_stream_edge_set = out_stream_edges_[src_stream_id][dst_stream_id];
for (const auto &edge : src_stream_to_edges.second) {
out_stream_edge_set.insert({edge.peer_node_id, edge.node_id});
GELOGI("[StreamEdge]Out depend Node: stream_id:[%" PRId64 "->%" PRId64 "] life_time:[%zu->%zu], only insert.",
src_stream_id, dst_stream_id, edge.peer_node_id, edge.node_id);
}
}
}
}
* stream1 stream2
* 1------+
* |
* 2 ---- 3
* |
* +------4
* 对于stream2 的入边来讲
* stream2<-stream1的入边 in_edge 3<-1 删掉
* 3<-2 保留
* 可以简单记为:id差越小越好
*/
void BlockMemAssigner::InsertStreamInEdge(const EdgeLife &new_in_edge, const int64_t src_stream_id,
const int64_t dst_stream_id, const char *src_name, const char *dst_name) {
auto &in_edge_set = in_stream_edges_[dst_stream_id][src_stream_id];
const auto old_in_edge_iter = in_edge_set.find(new_in_edge);
if (old_in_edge_iter != in_edge_set.end()) {
if (old_in_edge_iter->peer_node_id < new_in_edge.peer_node_id) {
const auto old_peer_node_id = old_in_edge_iter->peer_node_id;
in_edge_set.erase(old_in_edge_iter);
in_edge_set.insert(new_in_edge);
if ((src_name != nullptr) && (dst_name != nullptr)) {
GELOGI("[StreamEdge]In depend Node: [%s<-%s] stream_id:[%" PRId64 "<-%" PRId64 "] life_time:[%zu<-%zu], erase and insert,"
" old_peer_node_id[%zu].", dst_name, src_name,
dst_stream_id, src_stream_id, new_in_edge.node_id, new_in_edge.peer_node_id,
old_peer_node_id);
} else {
GELOGI("[StreamEdge]In depend Node: stream_id:[%" PRId64 "<-%" PRId64 "] life_time:[%zu<-%zu], erase and insert,"
" old_peer_node_id[%zu].", dst_stream_id, src_stream_id, new_in_edge.node_id, new_in_edge.peer_node_id,
old_peer_node_id);
}
} else {
if ((src_name != nullptr) && (dst_name != nullptr)) {
GELOGI("[StreamEdge]In depend Node: [%s<-%s] stream_id:[%" PRId64 "<-%" PRId64 "] life_time:[%zu<-%zu], not erase, not insert, "
"old_peer_node_id[%zu] >= new_peer_node_id[%zu].", dst_name, src_name,
dst_stream_id, src_stream_id, new_in_edge.node_id, new_in_edge.peer_node_id,
old_in_edge_iter->peer_node_id, new_in_edge.peer_node_id);
} else {
GELOGI("[StreamEdge]In depend Node: stream_id:[%" PRId64 "<-%" PRId64 "] life_time:[%zu<-%zu], not erase, not insert, "
"old_peer_node_id[%zu] >= new_peer_node_id[%zu].", dst_stream_id, src_stream_id, new_in_edge.node_id,
new_in_edge.peer_node_id, old_in_edge_iter->peer_node_id, new_in_edge.peer_node_id);
}
}
} else {
in_edge_set.insert(new_in_edge);
if ((src_name != nullptr) && (dst_name != nullptr)) {
GELOGI("[StreamEdge]In depend Node: [%s<-%s] stream_id:[%" PRId64 "<-%" PRId64 "] life_time:[%zu<-%zu], only insert.",
dst_name, src_name, dst_stream_id, src_stream_id,
new_in_edge.node_id, new_in_edge.peer_node_id);
} else {
GELOGI("[StreamEdge]In depend Node: stream_id:[%" PRId64 "<-%" PRId64 "] life_time:[%zu<-%zu], only insert.",
dst_stream_id, src_stream_id, new_in_edge.node_id, new_in_edge.peer_node_id);
}
}
}
* stream1 stream2
* 10--+
* 20 \ 30
* 40 ---\->50
* 60 \ 70
* 90
* 10->90
* 40->50
* only keep edge 40->50, 可以简单记为缩短peer_node_id与node_id差值
*/
static void EraseIntersectedEdge(std::set<EdgeLife, CompareEdgeLife> &in_edge_set,
const EdgeLife &old_in_edge,
const EdgeLife &new_in_edge,
const int64_t src_node_stream_id,
const int64_t dst_node_stream_id) {
if (((old_in_edge.node_id > new_in_edge.node_id) && (old_in_edge.peer_node_id < new_in_edge.peer_node_id)) ||
((old_in_edge.node_id < new_in_edge.node_id) && (old_in_edge.peer_node_id > new_in_edge.peer_node_id))) {
GELOGI("[StreamEdge]In depend Node: stream_id:[%" PRId64 "<-%" PRId64 "] erase life_time:[%zu<-%zu], will insert new "
"life_time:[%zu<-%zu].", dst_node_stream_id, src_node_stream_id, old_in_edge.node_id, old_in_edge.peer_node_id,
new_in_edge.node_id, new_in_edge.peer_node_id);
auto it = in_edge_set.find(old_in_edge);
if ((it != in_edge_set.end()) && (it->peer_node_id == old_in_edge.peer_node_id)) {
in_edge_set.erase(it);
}
}
}
* 函数作用:
* 该函数用于建立跨流的边,比如已有stream1->stream2->stream3, 建立stream1->stream3的边
* 主要逻辑:在建立完stream2->stream3的边后,遍历所有到stream2的流(比如stream1),并建立该流到stream3的边。
*
* in_stream_edges : 入边
* out_stream_edges : 出边
* node_desc: 当前节点
* in_node_desc: 输入节点
*
* 返回值:
* 无返回值,函数直接修改传入的in_stream_edges和out_stream_edges。
*
* in edge: stream_id:[2<-1] life_time:[1<-0]
* in edge: stream_id:[3<-2] life_time:[3<-1]
* in edge: stream_id:[3<-1] life_time:[3<-0] new edge
* in edge: stream_id:[3<-0] life_time:[3<-2]
* in edge: stream_id:[1<-3] life_time:[4<-3]
* in edge: stream_id:[1<-0] life_time:[4<-2] new edge
* in edge: stream_id:[1<-1] life_time:[4<-0] new edge
* in edge: stream_id:[1<-2] life_time:[4<-1] new edge
*/
void BlockMemAssigner::AddInStreamEdge(const ge::OpDesc *const node_desc, const ge::OpDesc *const in_node_desc) {
const auto stream_id = GetStreamId(node_desc);
const auto node_id = static_cast<size_t>(node_desc->GetId());
const auto in_stream_id = GetStreamId(in_node_desc);
const auto in_node_id = static_cast<size_t>(in_node_desc->GetId());
for (const auto &stream_to_in_edges : in_stream_edges_[in_stream_id]) {
const auto &in_edges = stream_to_in_edges.second;
const auto third_stream_id = stream_to_in_edges.first;
if (in_edges.empty() || (stream_id == third_stream_id)) {
continue;
}
* 这段逻辑是目的是找到一条边,其peer_node_id作为stream_id<-third_stream_id的peer_node_id
* 比如stream_id=3, in_stream_id=2, node_id=5, in_node_id=4, third_stream_id=1
* in edge: stream_id:[3<-2] life_time:[5<-4]
*
* in edge: stream_id:[2<-1] life_time:[1<-0]
* in edge: stream_id:[2<-1] life_time:[4<-2] <---edge_it 小于等于4的,最大的, 2就作为new_in_edge.peer_node_id
* in edge: stream_id:[2<-1] life_time:[7<-3]
*/
auto edge_it = in_edges.lower_bound({in_node_id, 0UL});
if ((edge_it == in_edges.end()) || ((*edge_it).node_id > in_node_id)) {
if (edge_it == in_edges.begin()) {
continue;
}
--edge_it;
}
const EdgeLife new_in_edge{node_id, (*edge_it).peer_node_id};
auto &in_edge_set = in_stream_edges_[stream_id][third_stream_id];
const auto old_edge_it = in_edge_set.lower_bound({node_id, 0UL});
if (old_edge_it != in_edge_set.end()) {
EraseIntersectedEdge(in_edge_set, *old_edge_it, new_in_edge, third_stream_id, stream_id);
}
InsertStreamInEdge(new_in_edge, third_stream_id, stream_id);
}
}
void BlockMemAssigner::GetDiffStreamEdgeLife(const NodePtr &node, const std::set<int64_t> &exclude_merge_streams) {
auto node_desc = node->GetOpDescBarePtr();
GE_CHECK_NOTNULL_JUST_RETURN(node_desc);
if (NodeUtils::IsLikeAtomicClean(node) || (node_desc->GetOpKernelLibName() == kEngineNameGeLocal)) {
return;
}
for (const auto &out_anchor : node->GetAllOutAnchors()) {
GE_CHECK_NOTNULL_JUST_RETURN(out_anchor);
for (auto const peer_in_anchor : out_anchor->GetPeerAnchorsPtr()) {
GE_CHECK_NOTNULL_JUST_RETURN(peer_in_anchor);
const auto peer_node = peer_in_anchor->GetOwnerNodeBarePtr();
GE_CHECK_NOTNULL_JUST_RETURN(peer_node);
const auto peer_in_node_desc = peer_node->GetOpDescBarePtr();
GE_CHECK_NOTNULL_JUST_RETURN(peer_in_node_desc);
const auto stream_id = GetStreamId(node_desc);
const auto peer_in_stream_id = GetStreamId(peer_in_node_desc);
if (stream_id == peer_in_stream_id) {
continue;
}
if (exclude_merge_streams.find(stream_id) != exclude_merge_streams.cend() ||
exclude_merge_streams.find(peer_in_stream_id) != exclude_merge_streams.cend()) {
GELOGI("Stream [%" PRId64 "->%" PRId64 "] will interrupt memory reuse among streams", stream_id,
peer_in_stream_id);
continue;
}
const auto node_id = static_cast<size_t>(node_desc->GetId());
const auto peer_node_id = static_cast<size_t>(peer_in_node_desc->GetId());
const EdgeLife new_in_edge{peer_node_id, node_id};
InsertStreamInEdge(new_in_edge, stream_id, peer_in_stream_id, node_desc->GetNamePtr(),
peer_in_node_desc->GetNamePtr());
AddInStreamEdge(peer_in_node_desc, node_desc);
}
}
}
void BlockMemAssigner::OptimizeStreamIdForMemoryReuse(const NodePtr &node) {
SetRealStreamIdForDataNode(node.get());
MemReuseStrategy::OptimizeDiffStream(node.get());
}
void BlockMemAssigner::SetRealStreamIdForDataNode(const Node *const node) {
auto node_op_desc = node->GetOpDescBarePtr();
if (node_op_desc == nullptr) {
return;
}
if (OpTypeUtils::IsDataNode(node->GetType()) && (GetStreamId(node->GetOpDescBarePtr()) == kInvalidStreamId)) {
const auto &out_anchor = node->GetOutDataAnchor(0U);
if (out_anchor != nullptr) {
std::set<int64_t> peer_streams;
for (auto const peer_in_anchor : out_anchor->GetPeerInDataAnchorsPtr()) {
if ((peer_in_anchor == nullptr) || (peer_in_anchor->GetOwnerNodeBarePtr() == nullptr)) {
continue;
}
peer_streams.insert(GetStreamId(peer_in_anchor->GetOwnerNodeBarePtr()->GetOpDescBarePtr()));
if (peer_streams.size() > 1U) {
break;
}
}
if (peer_streams.size() == 1U) {
MemReuseUtils::SetStreamId(node_op_desc, *peer_streams.begin());
}
}
}
}
Status GetNetoutputInNodeStream(const Node *const netoutput, const Node *const parent_node,
std::unordered_map<const Node *, std::vector<int64_t>> &parent_nodes_to_stream_ids) {
auto &parent_node_to_stream_ids = parent_nodes_to_stream_ids[parent_node];
const auto &netoutput_op_desc = netoutput->GetOpDesc();
const auto input_size = netoutput->GetAllInDataAnchorsSize();
for (uint32_t i = 0U; i < input_size; ++i) {
const auto input_desc = netoutput_op_desc->GetInputDesc(i);
uint32_t parent_out_index = 0U;
if (!AttrUtils::GetInt(input_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_out_index) ||
parent_out_index >= parent_node_to_stream_ids.size() ||
parent_node_to_stream_ids[parent_out_index] == kInvalidStreamId) {
continue;
}
const auto in_data_anchor = netoutput->GetInDataAnchor(i);
GE_ASSERT_NOTNULL(in_data_anchor);
GE_ASSERT_NOTNULL(in_data_anchor->GetPeerOutAnchor());
const auto input_node = in_data_anchor->GetPeerOutAnchor()->GetOwnerNodeBarePtr();
GE_ASSERT_NOTNULL(input_node);
const auto input_node_iter = parent_nodes_to_stream_ids.find(input_node);
int64_t input_node_stream_id = kInvalidStreamId;
if (input_node_iter == parent_nodes_to_stream_ids.end()) {
input_node_stream_id = GetStreamId(input_node->GetOpDescBarePtr());
} else {
const auto input_node_out_index = in_data_anchor->GetPeerOutAnchor()->GetIdx();
GE_ASSERT_TRUE(static_cast<size_t>(input_node_out_index) <= input_node_iter->second.size(),
"input_node_out_index: %d, input node output size: %zu, input node: %s",
input_node_out_index, input_node_iter->second.size(), input_node->GetNamePtr());
input_node_stream_id = input_node_iter->second[input_node_out_index];
}
if ((parent_node_to_stream_ids[parent_out_index] != kParentNodeDefaultStreamId) &&
(parent_node_to_stream_ids[parent_out_index] != input_node_stream_id)) {
GELOGI("subgraph node has multi streams, set no reuse. node:%s(%s) output: %u, new stream id: %lld,"
", original stream id: %lld, input_node: %s", parent_node->GetNamePtr(), parent_node->GetTypePtr(),
parent_out_index, input_node_stream_id, parent_node_to_stream_ids[parent_out_index],
input_node->GetNamePtr());
input_node_stream_id = kInvalidStreamId;
}
parent_node_to_stream_ids[parent_out_index] = input_node_stream_id;
GELOGI("get stream id from subgraph node. node:%s(%s) output: %u stream id: %lld, input_node: %s",
parent_node->GetNamePtr(), parent_node->GetTypePtr(), parent_out_index, input_node_stream_id,
input_node->GetNamePtr());
}
return SUCCESS;
}
Status BlockMemAssigner::SetRealStreamIdForParentNode(MemAssistInfo &mem_assist_info) {
const auto compute_graph = mem_assist_info.compute_graph;
auto &parent_nodes_to_stream_ids = mem_assist_info.parent_nodes_to_stream_ids;
const auto root_graph = GraphUtils::FindRootGraph(compute_graph);
GE_ASSERT_NOTNULL(root_graph);
std::map<int64_t, const Node *> ids_to_parent_node;
for (const NodePtr &n : compute_graph->GetAllNodes()) {
const auto op_desc = n->GetOpDescBarePtr();
GE_ASSERT_NOTNULL(op_desc);
if (op_desc->GetSubgraphInstanceNames().empty()) {
continue;
}
parent_nodes_to_stream_ids[n.get()].resize(op_desc->GetOutputsSize(), kParentNodeDefaultStreamId);
ids_to_parent_node[op_desc->GetId()] = n.get();
}
for (auto iter = ids_to_parent_node.rbegin(); iter != ids_to_parent_node.rend(); ++iter) {
const auto parent_node = iter->second;
for (const auto &subgraph_name : parent_node->GetOpDescBarePtr()->GetSubgraphInstanceNames()) {
const auto sub_graph = root_graph->GetSubgraph(subgraph_name);
if (sub_graph == nullptr) {
continue;
}
const auto netoutput = sub_graph->FindFirstNodeMatchType(NETOUTPUT);
GE_ASSERT_NOTNULL(netoutput);
GE_ASSERT_NOTNULL(netoutput->GetOpDesc());
GE_ASSERT_SUCCESS(GetNetoutputInNodeStream(netoutput.get(), parent_node, parent_nodes_to_stream_ids));
}
}
return SUCCESS;
}
Status BlockMemAssigner::GetRealStreamIdForParentNode(const NodePtr &node, const uint32_t out_index,
int64_t &stream_id, bool &is_reuse) const {
is_reuse = true;
const auto iter = parent_nodes_to_stream_ids_.find(node.get());
if ((iter == parent_nodes_to_stream_ids_.end()) ||
(out_index >= iter->second.size()) ||
(iter->second.at(out_index) == kParentNodeDefaultStreamId)) {
return SUCCESS;
}
if (iter->second.at(out_index) == kInvalidStreamId) {
is_reuse = false;
GELOGI("node %s(%s) out_index: %u has multi streams, set no reuse",
node->GetNamePtr(), node->GetTypePtr(), out_index);
return SUCCESS;
}
stream_id = iter->second.at(out_index);
GELOGI("node %s(%s) out_index: %u get stream %lld", node->GetNamePtr(), node->GetTypePtr(), out_index, stream_id);
return SUCCESS;
}
std::set<int64_t> GetStreamMergeAndOutStreams(const ge::ComputeGraphPtr &graph) {
std::set<int64_t> merge_and_out_streams;
for (const NodePtr &node : graph->GetAllNodes()) {
if (!MemReuseUtils::IsMergeNode(node)) {
continue;
}
if (merge_and_out_streams.insert(GetStreamId(node->GetOpDescBarePtr())).second) {
GELOGD("Stream %" PRId64 " not reuse memory with other streams", GetStreamId(node->GetOpDescBarePtr()));
}
for (const auto &out_node : node->GetOutAllNodes()) {
if (merge_and_out_streams.insert(GetStreamId(out_node->GetOpDescBarePtr())).second) {
GELOGD("Stream %" PRId64 " not reuse memory with other streams", GetStreamId(out_node->GetOpDescBarePtr()));
}
}
}
return merge_and_out_streams;
}
* 不能改为非static的,不能修改成员变量,因为在HybridMemAssigner中,
* 只有一个binary_assigner对象会调用该函数,其他开启多线程创建的assigner对象并没有调用这个接口
*/
Status BlockMemAssigner::PreparationForAssign(MemAssistInfo &mem_assist_info) {
for (const NodePtr &n : mem_assist_info.compute_graph->GetAllNodes()) {
OptimizeStreamIdForMemoryReuse(n);
}
GE_ASSERT_SUCCESS(BlockMemAssigner::SetRealStreamIdForParentNode(mem_assist_info));
return SUCCESS;
}
Status BlockMemAssigner::GetOutAndWorkSpaceMem(std::vector<int64_t> &all_memory_size) {
std::vector<int64_t> temp;
std::map<std::string, std::vector<int64_t>> batch_all_memory_size;
std::map<std::string, int64_t> batch_total_size;
std::set<int64_t> exclude_merge_streams = GetStreamMergeAndOutStreams(compute_graph_);
for (const NodePtr &n : compute_graph_->GetAllNodes()) {
GetDiffStreamEdgeLife(n, exclude_merge_streams);
GetContinuousNodeLifeTimeBegin(n.get(), n.get(), 0, 0U);
auto node_op_desc = n->GetOpDescBarePtr();
GE_ASSERT_NOTNULL(node_op_desc);
if (CheckIsZeroMemNodeType(node_op_desc->GetTypePtr())) {
continue;
}
std::string batch_label;
(void)ge::AttrUtils::GetStr(node_op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
if (NodeUtils::IsLikeAtomicClean(n)) {
atomic_addr_clean_id_ = node_op_desc->GetId();
}
for (auto out_anchor : n->GetAllOutDataAnchorsPtr()) {
auto output_desc = node_op_desc->MutableOutputDesc(out_anchor->GetIdx());
if (output_desc == nullptr) {
continue;
}
int64_t size = 0;
(void)MemReuseUtils::GetTensorSize(*output_desc, size,
MemReuseUtils::IsNeedSplitSize(n, out_anchor->GetIdx()));
GE_ASSERT_TRUE(size >= 0, "[Check][TensorSize]tensor_size:%" PRId64 " is invalid, "
"maybe it is unknown shape node, Node_name:%s",
size, node_op_desc->GetNamePtr());
batch_all_memory_size[batch_label].emplace_back(size);
if (batch_total_size.find(batch_label) == batch_total_size.end()) {
batch_total_size[batch_label] = size;
} else {
batch_total_size[batch_label] += size;
}
if (!anchor_to_symbol_.empty()) {
auto iter1 = anchor_to_symbol_.find(NodeIndexIO(n.get(), out_anchor->GetIdx(), kOut).ToString());
if (iter1 == anchor_to_symbol_.end()) {
continue;
}
const std::string &symbol = iter1->second;
auto iter2 = symbol_mem_reuse_info_.find(symbol);
if (iter2 == symbol_mem_reuse_info_.end()) {
symbol_mem_reuse_info_[symbol].size_ = size;
} else if (size > static_cast<int64_t>(iter2->second.size_)) {
iter2->second.size_ = size;
}
}
}
temp.clear();
GetNodeWorkSpaceSize(n, temp, batch_total_size[batch_label]);
batch_all_memory_size[batch_label].insert(batch_all_memory_size[batch_label].cend(), temp.cbegin(), temp.cend());
}
HandleInStreamRedundantDependence(in_stream_edges_);
InsertStreamOutEdge();
GELOGI("The last atomic_addr_clean node id: %" PRId64 "", atomic_addr_clean_id_);
GetMaxBatchAllMemorySize(batch_all_memory_size, batch_total_size, all_memory_size, max_batch_label_);
InitReuseFlag();
GE_ASSERT_SUCCESS(continuous_mem_mng_.Init(compute_graph_), "continuous memory manager init failed, graph: %s",
compute_graph_->GetName().c_str());
PrintSymbolMap();
return SUCCESS;
}
size_t GetBlockSize(size_t size, const std::vector<int64_t> &ranges, bool use_range) {
if (!use_range) {
size_t align_size = size;
MemReuseUtils::AlignMemOffset(align_size);
return align_size;
}
for (int64_t x : ranges) {
auto x_temp = static_cast<size_t>(x);
if (size <= x_temp) {
return x_temp;
}
}
GELOGW("Memory needed size:%zu is beyond the biggest block in memory ranges.", size);
return size;
}
bool BlockMemAssigner::IsNoNeedAssignMemory(const NodePtr &n, const NodeIndexIO &out_node_index_io,
const uint32_t index) const {
const auto op_desc = n->GetOpDescBarePtr();
const auto output_tensor_desc = op_desc->MutableOutputDesc(index);
std::string var_name;
if (ge::AttrUtils::GetStr(output_tensor_desc, ASSIGN_VAR_NAME, var_name) && !var_name.empty()) {
GELOGI("Op[%s] output[%u] ref var[%s].", op_desc->GetNamePtr(), index, var_name.c_str());
return true;
}
const auto iter = symbol_mem_reuse_info_.find(out_node_index_io.ToString());
if (iter != symbol_mem_reuse_info_.end()) {
return iter->second.no_assign_mem_;
}
return false;
}
void BlockMemAssigner::GetRefContinuousInputNodeAndFixedAddrPriorFlag(const std::string &symbol,
const std::list<NodeIndexIO> &anchors) {
uint32_t in_count = 0U;
uint32_t out_count = 0U;
NodeIndexIO tail_node(nullptr, 0U, kIn);
bool is_fixed_addr_prior = false;
for (const auto &node_index_io : anchors) {
if (node_index_io.node_ptr_ == nullptr) {
continue;
}
if (node_index_io.io_type_ == kIn) {
in_count++;
} else if (node_index_io.io_type_ == kOut) {
out_count++;
} else {
}
tail_node.node_ptr_ = node_index_io.node_ptr_;
tail_node.index_ = node_index_io.index_;
tail_node.io_type_ = node_index_io.io_type_;
if (is_fixed_addr_prior) {
continue;
}
(void) ge::AttrUtils::GetBool(node_index_io.node_ptr_->GetOpDesc(), ATTR_NAME_IS_FIXED_ADDR_PRIOR,
is_fixed_addr_prior);
if (is_fixed_addr_prior) {
symbol_mem_reuse_info_[symbol].is_fixed_addr_prior_ = true;
GELOGI("Symbol=%s is fixed addr prior, peer node=%s.", symbol.c_str(), node_index_io.ToString().c_str());
}
}
if ((in_count >= 2U) && (in_count == out_count) && (tail_node.index_ != 0U) && (tail_node.io_type_ == kIn)
&& (tail_node.node_ptr_->GetOpDescBarePtr() != nullptr)) {
bool is_input_continuous = false;
(void) ge::AttrUtils::GetBool(tail_node.node_ptr_->GetOpDescBarePtr(), ATTR_NAME_NOPADDING_CONTINUOUS_INPUT,
is_input_continuous);
if (is_input_continuous) {
symbol_mem_reuse_info_[symbol].no_assign_mem_ = true;
GELOGI("Symbol=%s, ref count:%d, tail node:%s is continuous input.", symbol.c_str(), in_count,
tail_node.node_ptr_->GetNamePtr());
}
}
}
void BlockMemAssigner::GetContinuousNodeLifeTimeBegin(const Node *const org_node, const Node *const node,
const int32_t index, uint32_t depth) {
++depth;
GE_IF_BOOL_EXEC((depth > kMaxDepthNum), return);
bool is_nopading_input_continuous = false;
const auto node_op_desc = node->GetOpDescBarePtr();
GE_CHECK_NOTNULL_EXEC(node_op_desc, return);
const auto &org_node_desc = org_node->GetOpDescBarePtr();
GE_CHECK_NOTNULL_EXEC(org_node_desc, return);
(void) ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_nopading_input_continuous);
if (is_nopading_input_continuous) {
for (const auto in_anchor : node->GetAllInDataAnchorsPtr()) {
const bool invalid_node = ((in_anchor == nullptr) || (in_anchor->GetPeerOutAnchor() == nullptr)
|| (in_anchor->GetPeerOutAnchor()->GetOwnerNodeBarePtr() == nullptr));
GE_IF_BOOL_EXEC(invalid_node, continue);
GetContinuousNodeLifeTimeBegin(org_node, in_anchor->GetPeerOutAnchor()->GetOwnerNodeBarePtr(),
in_anchor->GetIdx(), depth);
}
if (org_node == node) {
SetContinuousNodeLifeTimeBegin(node, node, 0U);
}
} else {
GE_IF_BOOL_EXEC((depth < 2U), return);
auto it = cascade_min_life_time_.find(org_node_desc->GetNamePtr());
if (it == cascade_min_life_time_.end()) {
cascade_min_life_time_[org_node_desc->GetNamePtr()] = node_op_desc->GetId();
} else {
if (static_cast<size_t>(node_op_desc->GetId()) < it->second) {
it->second = node_op_desc->GetId();
}
}
if (index == 0) {
cascade_min_life_time_[node_op_desc->GetNamePtr()] = node_op_desc->GetId();
}
GELOGD("Find node:%s life time begin:%" PRId64 " by ref node:%s index:%d.", node_op_desc->GetNamePtr(),
node_op_desc->GetId(), org_node_desc->GetNamePtr(), index);
}
return;
}
void BlockMemAssigner::SetContinuousNodeLifeTimeBegin(const Node *const org_node, const Node *const node,
uint32_t depth) {
++depth;
if (depth > kMaxDepthNum) {
return;
}
const auto node_op_desc = node->GetOpDescBarePtr();
GE_CHECK_NOTNULL_EXEC(node_op_desc, return);
bool is_nopading_input_continuous = false;
(void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_nopading_input_continuous);
if (is_nopading_input_continuous) {
for (const auto in_anchor : node->GetAllInDataAnchorsPtr()) {
const bool invalid_node = (in_anchor == nullptr) || (in_anchor->GetPeerOutAnchor() == nullptr);
GE_IF_BOOL_EXEC(invalid_node, continue);
const auto peer_in_node = in_anchor->GetPeerOutAnchor()->GetOwnerNodeBarePtr();
GE_CHECK_NOTNULL_EXEC(peer_in_node, continue);
SetContinuousNodeLifeTimeBegin(org_node, peer_in_node, depth);
}
} else {
auto it = cascade_min_life_time_.find(node_op_desc->GetNamePtr());
if (it != cascade_min_life_time_.end()) {
const auto org_node_desc = org_node->GetOpDescBarePtr();
GE_CHECK_NOTNULL_EXEC(org_node_desc, return);
const auto it_org = cascade_min_life_time_.find(org_node_desc->GetNamePtr());
if (it_org != cascade_min_life_time_.cend()) {
GELOGI("Node:%s set min life time begin from %zu to %zu by ref node:%s.", node->GetNamePtr(),
it->second, it_org->second, org_node_desc->GetNamePtr());
it->second = it_org->second;
}
}
}
return;
}
bool IsSeparateCleanContinuousInputNode(const Node *const node) {
GE_ASSERT_NOTNULL(node->GetOpDescBarePtr());
bool is_input_continuous = MemLayoutConflictUtil::IsContinuousInput(node);
if (!is_input_continuous) {
return false;
}
bool need_gentask_atomic = false;
(void)ge::AttrUtils::GetBool(node->GetOpDescBarePtr(), "need_gentask_atomic", need_gentask_atomic);
const bool has_atomic_input = node->GetOpDescBarePtr()->HasAttr(ATOMIC_ATTR_INPUT_INDEX);
if (has_atomic_input && need_gentask_atomic) {
return true;
}
return false;
}
* 1. NoPadding连续输入,仅首个输入分配一个block,所有输入使用这一个block
* 2. 带Padding连续输入,每个输入有自己的block,连续在一起。
* (如果ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION为true,或设置了单独清零,等同于NoPadding连续输入了)
*/
bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index,
InDataAnchor *&continuous_in_anchor, bool &is_reuse_zero_copy,
std::set<int64_t> &streams) {
if (out_index >= n->GetAllOutDataAnchorsSize()) {
return false;
}
auto node_desc = n->GetOpDescBarePtr();
GE_ASSERT_NOTNULL(node_desc);
std::vector<int64_t> offsets_for_fusion = {};
bool has_lx_fusion_attr =
AttrUtils::GetListInt(node_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion);
auto out_anchor = n->GetOutDataAnchor(out_index);
GE_ASSERT_NOTNULL(out_anchor);
bool is_out_node_continuous_input = false;
for (auto const peer_in_anchor : out_anchor->GetPeerInDataAnchorsPtr()) {
InDataAnchor *new_peer_in_anchor = nullptr;
auto is_input_continuous = MemLayoutConflictUtil::IsContinuousInputThroughRefNode(peer_in_anchor, true,
new_peer_in_anchor);
if (is_input_continuous) {
has_lx_fusion_attr = true;
} else {
is_input_continuous = MemLayoutConflictUtil::IsContinuousInputThroughRefNode(peer_in_anchor, false,
new_peer_in_anchor);
}
if (!is_input_continuous) {
continue;
}
* 判断输出节点是不是连续内存节点,不能只看直连的输出,而且也要看经过RefNode连接的节点
* new_peer不一定是n的直连输出,也可能是n经过一个或多个RefNod连接的输出节点
*/
GE_ASSERT_NOTNULL(new_peer_in_anchor);
const auto continuous_node = new_peer_in_anchor->GetOwnerNodeBarePtr();
GE_ASSERT_NOTNULL(continuous_node);
const bool is_separate_clean_continuous_input = IsSeparateCleanContinuousInputNode(continuous_node);
if (CheckIsZeroMemNodeType(continuous_node->GetTypePtr()) ||
((has_lx_fusion_attr || is_separate_clean_continuous_input) && (new_peer_in_anchor->GetIdx() != 0))) {
GELOGI("Node[%s] output[%u] peer node[%s] type[%s] input[%u].", n->GetNamePtr(), out_index,
continuous_node->GetNamePtr(), continuous_node->GetTypePtr(), new_peer_in_anchor->GetIdx());
return false;
}
if (n->GetOwnerComputeGraphBarePtr() == nullptr) {
continue;
}
GELOGI("%s name[%s] output[%u] peer[%s] input[%d] need continuous, input size[%u].",
n->GetOwnerComputeGraphBarePtr()->GetName().c_str(), n->GetNamePtr(), out_index,
continuous_node->GetNamePtr(), new_peer_in_anchor->GetIdx(),
continuous_node->GetAllInDataAnchorsSize());
const auto continuous_op_desc = continuous_node->GetOpDescBarePtr();
GE_ASSERT_NOTNULL(continuous_op_desc);
if (node_continuous_input_blocks_[continuous_op_desc->GetId()].size() == 0U) {
is_reuse_zero_copy = false;
if (has_lx_fusion_attr || is_separate_clean_continuous_input) {
is_op_reuse_mem_ = IsContinuousMemoryReuse(n.get(), out_index, continuous_node, streams);
is_out_node_continuous_input = is_separate_clean_continuous_input ? true : is_out_node_continuous_input;
is_separate_clean_continuous_inputs_ = is_separate_clean_continuous_input;
continue;
}
node_continuous_input_counts_[continuous_op_desc->GetId()] = std::make_pair(
continuous_node->GetTypePtr(), continuous_node->GetAllInDataAnchorsSize());
}
continuous_in_anchor = new_peer_in_anchor;
is_out_node_continuous_input = true;
}
return is_out_node_continuous_input;
}
* 直连或经过RefNode间接连接连续输入节点的,有些情况只给第0个输入分配内存,大小是所有输入的总大小,其他输入不分配内存。
* 1. NoPadding连续输入只给第0个输入分配内存
* 2. 带Padding连续输入,一般情况下每个输入都分配内存,以下两个特殊情况下,只给第0个输入分配内存
* 2.1 输入上带有lx fusion(ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION)属性的
* 2.2 连续输入需要对输入做单独清零的(有need_gentask_atomic/ATOMIC_ATTR_INPUT_INDEX属性)
* 3. NoPadding连续输入级联场景,只给最后一个PhonyConcat的第0个输入分配内存
* 4. 既作为第0个输入,又作为其他输入的,需要分配内存。
*/
Status BlockMemAssigner::GetNoNeedAssignMemoryFlag(const NodePtr &n, uint32_t out_index,
bool &no_need_assign_memory) const {
no_need_assign_memory = false;
auto node_desc = n->GetOpDescBarePtr();
GE_ASSERT_NOTNULL(node_desc);
auto out_anchor = n->GetOutDataAnchor(out_index);
GE_ASSERT_NOTNULL(out_anchor);
std::vector<int64_t> offsets_for_fusion = {};
const auto has_lx_fusion_attr =
AttrUtils::GetListInt(node_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion);
for (auto const peer_in_anchor : out_anchor->GetPeerInDataAnchorsPtr()) {
InDataAnchor *new_peer_in_anchor = nullptr;
const auto nopadding_continuous = MemLayoutConflictUtil::IsContinuousInputThroughRefNode(peer_in_anchor, true,
new_peer_in_anchor);
bool continuous = false;
if (!nopadding_continuous) {
continuous = MemLayoutConflictUtil::IsContinuousInputThroughRefNode(peer_in_anchor, false,
new_peer_in_anchor);
}
if (!(continuous || nopadding_continuous)) {
continue;
}
* 判断输出节点是不是连续内存节点,不能只看直连的输出,而且也要看经过RefNode连接的节点
* new_peer不一定是n的直连输出,也可能是n经过一个或多个RefNod连接的输出节点
*/
GE_ASSERT_NOTNULL(new_peer_in_anchor);
const auto continuous_node = new_peer_in_anchor->GetOwnerNodeBarePtr();
GE_ASSERT_NOTNULL(continuous_node);
if (new_peer_in_anchor->GetIdx() == 0) {
no_need_assign_memory = false;
break;
}
if (continuous) {
const bool is_separate_clean_continuous_input = IsSeparateCleanContinuousInputNode(continuous_node);
if (!(has_lx_fusion_attr || is_separate_clean_continuous_input)) {
continue;
}
}
no_need_assign_memory = true;
GELOGI("%s name[%s] output[%u] peer[%s] input[%d] need continuous, input size[%u], nopadding_continuous[%d], "
"continuous[%d], has_lx_fusion_attr[%d]",
n->GetOwnerComputeGraphBarePtr()->GetName().c_str(), n->GetNamePtr(), out_index,
continuous_node->GetNamePtr(), new_peer_in_anchor->GetIdx(),
continuous_node->GetAllInDataAnchorsSize(), nopadding_continuous, continuous, has_lx_fusion_attr);
}
GELOGI("%s name[%s] output[%u] no_need_assign_memory:%d.",
n->GetOwnerComputeGraphBarePtr()->GetName().c_str(), n->GetNamePtr(), out_index, no_need_assign_memory);
return SUCCESS;
}
Status BlockMemAssigner::CalNodeAsContinuousInputMaxLife(const Node *const n, uint32_t out_index,
const Node *const continuous_node,
int64_t &first_node_max_life, std::set<int64_t> &streams) {
auto node_desc = n->GetOpDescBarePtr();
auto peer_node_desc = continuous_node->GetOpDescBarePtr();
life_begin_ = static_cast<size_t>(node_desc->GetId());
for (const auto &in_anchor : continuous_node->GetAllInDataAnchorsPtr()) {
GE_CHECK_NOTNULL(in_anchor);
if (in_anchor->GetPeerOutAnchor() == nullptr) {
continue;
}
if ((in_anchor->GetPeerOutAnchor()->GetOwnerNodeBarePtr() == nullptr) ||
(in_anchor->GetPeerOutAnchor()->GetOwnerNodeBarePtr()->GetOpDescBarePtr() == nullptr)) {
GELOGE(FAILED, "[Check][OpDesc]Node[%s] output[%u] peer input node desc is null.", n->GetNamePtr(), out_index);
return FAILED;
}
auto peer_out_node_desc = in_anchor->GetPeerOutAnchor()->GetOwnerNodeBarePtr()->GetOpDescBarePtr();
Node *src_node = nullptr;
int32_t src_out_index = 0;
GE_ASSERT_SUCCESS(MemReuseUtils::GetSrcNodeThroughRefNode(continuous_node, in_anchor->GetIdx(), src_node,
src_out_index));
(void)src_out_index;
GE_ASSERT_NOTNULL(src_node->GetOpDescBarePtr());
* a(stream 0)
* |
* b(stream 0)--+
* | |
* c(stream 0) d(stream 1)
* \ /
* Phonyconcat
*
* 如果Phonyconcat的所有输入的流相同,则取id最小的作为life_begin_
* 如果有的输入与首个输入流不同,例如c是首个输入,而d的流不同,则找stream1 <- stream0的入边,本例子会找到b
*/
int64_t min_life_time = kMinLifeTime;
GetDiffStreamMinLifeTime(src_node, GetStreamId(n->GetOpDescBarePtr()), in_stream_edges_, min_life_time);
if (static_cast<size_t>(min_life_time) < life_begin_) {
life_begin_ = static_cast<size_t>(min_life_time);
GELOGI(
"Node[%s] life[%" PRId64 "] output[%u] is not continuous input node[%s] life[%" PRId64 "]'s min life time, "
"min is life[%zu], src_node[%s], life[%" PRId64 "], stream_id[%" PRId64 "]",
n->GetNamePtr(), node_desc->GetId(), out_index, peer_node_desc->GetNamePtr(), peer_node_desc->GetId(),
min_life_time, src_node->GetNamePtr(), src_node->GetOpDescBarePtr()->GetId(),
GetStreamId(src_node->GetOpDescBarePtr()));
}
int64_t max_node_life_time_by_symbol = 0;
const int64_t node_max_life =
GetNodeMaxLife(symbol_to_anchors_, out_stream_edges_, in_anchor->GetPeerOutAnchor()->GetOwnerNodeBarePtr(),
in_anchor->GetPeerOutAnchor()->GetIdx(), max_node_life_time_by_symbol, streams,
GetStreamId(n->GetOpDescBarePtr()));
if (node_max_life > first_node_max_life) {
first_node_max_life = node_max_life;
GELOGI(
"Node[%s] life[%" PRId64 "] output[%u]'s continuous input node[%s] life[%" PRId64 "]'s is not node[%s] output[%d]'s "
"max life node",
n->GetNamePtr(), node_desc->GetId(), out_index, peer_node_desc->GetNamePtr(), peer_node_desc->GetId(),
peer_out_node_desc->GetNamePtr(), in_anchor->GetPeerOutAnchor()->GetIdx());
}
}
return SUCCESS;
}
bool BlockMemAssigner::IsContinuousMemoryReuse(const Node *const n, uint32_t out_index,
const Node *const continuous_node, std::set<int64_t> &streams) {
if (!is_op_reuse_mem_) {
return false;
}
int64_t first_node_max_life = 0;
if (CalNodeAsContinuousInputMaxLife(n, out_index, continuous_node, first_node_max_life, streams) != SUCCESS) {
return false;
}
life_end_ = static_cast<size_t>(first_node_max_life);
return true;
}
void IsSymbolNodePreReuse(const Node *const node, const bool has_subgraph_data,
bool &pre_reuse_flag, bool &post_reuse_flag) {
static const std::string kFunctionOp = "FunctionOp";
bool is_ref = false;
(void)ge::AttrUtils::GetBool(node->GetOpDescBarePtr(), ATTR_NAME_REFERENCE, is_ref);
if (has_subgraph_data && is_ref) {
pre_reuse_flag = false;
}
if (node->GetType() == kFunctionOp) {
std::string original_type;
(void)AttrUtils::GetStr(node->GetOpDescBarePtr(), ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, original_type);
if (original_type == ITERATORV2) {
pre_reuse_flag = false;
post_reuse_flag = false;
}
}
}
bool BlockMemAssigner::GetOutputNodeReuseMemFlagByIndex(const int32_t index) {
if (output_index_to_reuse_mem_flag_.size() == 0U) {
return false;
}
return ((index >= 0) && (static_cast<size_t>(index) < output_index_to_reuse_mem_flag_.size())) ?
output_index_to_reuse_mem_flag_[index] : false;
}
bool BlockMemAssigner::GetInputNodeReuseMemFlag(const NodePtr &n) {
if (input_index_to_reuse_mem_flag_.size() == 0U) {
return false;
}
const auto op_desc = n->GetOpDescBarePtr();
int32_t index = 0;
if (!(ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, index))) {
GELOGW("Node[%s] Get index from data attr failed.", op_desc->GetName().c_str());
return false;
}
return ((index >= 0) && (static_cast<size_t>(index) < input_index_to_reuse_mem_flag_.size())) ?
input_index_to_reuse_mem_flag_[index] : false;
}
void BlockMemAssigner::InitReuseFlag() {
static const std::set<std::string> kNoPreReuseTypes = {
ge::DATA_TYPE, ge::AIPP_DATA_TYPE, ge::ANN_DATA_TYPE, ge::QUEUE_DATA, ge::PROPOSAL, ge::CONSTANT,
ge::CONSTANTOP, ge::GETNEXT, ge::DROPOUTGENMASK, ge::REFDATA, "AdpGetNext", "DynamicGetNext"};
static const std::set<std::string> kNoPostReuseTypes = {ge::DATA_TYPE, ge::AIPP_DATA_TYPE, ge::QUEUE_DATA,
ge::ENTER, ge::REFENTER, ge::NEXTITERATION,
ge::REFNEXTITERATION, ge::REFDATA, ge::GETNEXT, "AdpGetNext",
"DynamicGetNext", ge::DROPOUTGENMASK};
InitDiffStreamSameOutTable();
for (const auto &pair : symbol_to_anchors_) {
const std::string &symbol = pair.first;
bool pre_reuse_flag = true;
bool post_reuse_flag = true;
int64_t mem_type = RT_MEMORY_HBM;
GetSymbolMemType(pair.second, mem_type);
AddSymbolMemType(symbol, mem_type);
GetRefContinuousInputNodeAndFixedAddrPriorFlag(symbol, pair.second);
GELOGD("The memory type of symbol[%s] is [%" PRId64 "].", symbol.c_str(), mem_type);
if (mem_type == RT_MEMORY_P2P_DDR) {
UpdateOpTensorMemType(pair.second, mem_type);
}
bool has_subgraph_data = false;
bool diff_stream_prior = false;
for (const auto &node_index_io : pair.second) {
GE_CHECK_NOTNULL_EXEC(node_index_io.node_ptr_, continue);
GE_CHECK_NOTNULL_EXEC(node_index_io.node_ptr_->GetOpDescBarePtr(), continue);
bool in_flag = IsDirectInputNode(node_index_io.node_ptr_, compute_graph_);
bool in_reuse_mem_flag = in_flag ? GetInputNodeReuseMemFlag(node_index_io.node_) : false;
if (!in_flag && (node_index_io.node_ptr_->GetType() == DATA) &&
node_index_io.node_ptr_->GetOpDescBarePtr()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX)) {
GELOGD("Node: %s is subgraph data, continue", node_index_io.node_ptr_->GetNamePtr());
has_subgraph_data = true;
continue;
}
if (node_index_io.io_type_ == kIn) {
continue;
}
IsSymbolNodePreReuse(node_index_io.node_ptr_, has_subgraph_data, pre_reuse_flag, post_reuse_flag);
diff_stream_prior = MemReuseStrategy::GetDiffStreamPrior(node_index_io.node_ptr_);
OutDataAnchorPtr out_anchor = node_index_io.node_ptr_->GetOutDataAnchor(node_index_io.index_);
if (out_anchor == nullptr) {
continue;
}
bool out_flg = false;
bool out_reuse_mem_flag = false;
for (const auto in_anchor : out_anchor->GetPeerInDataAnchorsPtr()) {
if (IsDirectOutputNode(in_anchor->GetOwnerNodeBarePtr(), compute_graph_)) {
out_flg = true;
out_reuse_mem_flag = GetOutputNodeReuseMemFlagByIndex(in_anchor->GetIdx());
break;
}
}
const auto type = out_anchor->GetOwnerNodeBarePtr()->GetTypePtr();
if (in_reuse_mem_flag || out_reuse_mem_flag) {
pre_reuse_flag = pre_reuse_flag && (!in_flag) && (out_flg || NotMatchNoReuseType(kNoPreReuseTypes, type));
post_reuse_flag = post_reuse_flag && (!out_flg) && (in_flag || NotMatchNoReuseType(kNoPostReuseTypes, type));
} else {
pre_reuse_flag = pre_reuse_flag && (!in_flag) && (!out_flg) && NotMatchNoReuseType(kNoPreReuseTypes, type);
post_reuse_flag = post_reuse_flag && (!in_flag) && (!out_flg) && NotMatchNoReuseType(kNoPostReuseTypes, type);
}
if (!pre_reuse_flag && !post_reuse_flag) {
break;
}
}
MemoryReuseInfo &memory_reuse_info = symbol_mem_reuse_info_[symbol];
memory_reuse_info.pre_reuse_flag_ = pre_reuse_flag;
memory_reuse_info.post_reuse_flag_ = post_reuse_flag;
memory_reuse_info.diff_stream_prior_ = diff_stream_prior;
if ((!memory_reuse_info.pre_reuse_flag_) && memory_reuse_info.post_reuse_flag_) {
auto &memory_stat = memory_stat_[mem_type];
auto align_size = memory_reuse_info.size_;
MemReuseUtils::AlignMemOffset(align_size);
memory_stat.theory_memory_size_ += align_size;
}
}
}
* 输出使用同一个memory_block的当成一组,在进行复用时,要么同时复用某个节点,要么同时不复用某个节点,共同进退。这里只需要处理不同流的,
* StreamMerge/Merge输入必然不同流,NoPaddingContinuousInput 会计算同流或不同流的所有输入最小life_begin_。
*/
void BlockMemAssigner::InitDiffStreamSameOutTable() {
for (auto &node : compute_graph_->GetAllNodesPtr()) {
if (!MemReuseUtils::IsMergeNode(node)) {
continue;
}
std::list<OutDataAnchor *> same_out_anchor;
std::set<int64_t> stream_id_set;
for (auto in_anchor : node->GetAllInDataAnchorsPtr()) {
if ((in_anchor->GetPeerOutAnchor() != nullptr)
&& (in_anchor->GetPeerOutAnchor()->GetOwnerNodeBarePtr() != nullptr)) {
same_out_anchor.push_back(in_anchor->GetPeerOutAnchor().get());
stream_id_set.insert(GetStreamId(in_anchor->GetPeerOutAnchor()->GetOwnerNodeBarePtr()->GetOpDescBarePtr()));
}
}
if (stream_id_set.size() <= 1U) {
continue;
}
same_out_group_holder_.push_back(std::move(same_out_anchor));
for (auto out_anchor : same_out_group_holder_.back()) {
same_out_group_[out_anchor] = &same_out_group_holder_.back();
}
if (IsLogEnable(GE, DLOG_INFO)) {
std::stringstream ss;
for (auto out_anchor : same_out_group_holder_.back()) {
ss << "topoid_" << out_anchor->GetOwnerNodeBarePtr()->GetOpDescBarePtr()->GetId() << "_out_"
<< out_anchor->GetIdx() << ", ";
}
GELOGI("diff stream same out: %s", ss.str().c_str());
}
}
}
bool BlockMemAssigner::HasSameOutAnchorWithDiffStream(const Node *n, const uint32_t index) const {
GE_ASSERT_NOTNULL(n);
const auto out_data_anchor = n->GetOutDataAnchor(index);
GE_ASSERT_NOTNULL(out_data_anchor);
return same_out_group_.find(out_data_anchor.get()) != same_out_group_.end();
}
void BlockMemAssigner::AddSymbolMemType(const std::string &symbol, int64_t memory_type) {
if ((memory_type == RT_MEMORY_P2P_DDR) || (memory_type == RT_MEMORY_HOST) || (memory_type == RT_MEMORY_HOST_SVM)) {
symbol_mem_reuse_info_[symbol].mem_type_ = memory_type;
} else {
symbol_mem_reuse_info_[symbol].mem_type_ = RT_MEMORY_HBM;
}
}
bool BlockMemAssigner::IsPreReuse(const NodeIndexIO &cur_node_index_io, std::string &symbol) const {
auto iter1 = anchor_to_symbol_.find(cur_node_index_io.ToString());
if (iter1 == anchor_to_symbol_.end()) {
return false;
}
symbol = iter1->second;
auto iter2 = symbol_mem_reuse_info_.find(symbol);
if (iter2 == symbol_mem_reuse_info_.end()) {
return false;
}
return iter2->second.pre_reuse_flag_;
}
bool BlockMemAssigner::IsPostReuse(const std::string &symbol, bool &diff_stream_prior) const {
auto iter = symbol_mem_reuse_info_.find(symbol);
if (iter == symbol_mem_reuse_info_.end()) {
return true;
}
diff_stream_prior = iter->second.diff_stream_prior_;
return iter->second.post_reuse_flag_;
}
bool BlockMemAssigner::IsPostReuse(const ge::MemoryBlock *const mem_block) const {
if (mem_block == nullptr) {
return false;
}
return mem_block->post_reuse_flag_;
}
bool BlockMemAssigner::IsSymbolExist(const NodeIndexIO &node_index_io, std::string &symbol, MemoryBlock *&block) const {
block = nullptr;
const auto node_io = node_index_io.ToString();
auto iter = anchor_to_symbol_.find(node_io);
if (iter == anchor_to_symbol_.end()) {
return false;
}
symbol = iter->second;
auto it_block = symbol_blocks_.find(iter->second);
auto symbol_exist = (it_block != symbol_blocks_.end());
if (symbol_exist) {
GELOGD("Node io:%s symbol:%s block:%s", node_io.c_str(), symbol.c_str(), GetName(*(it_block->second)).c_str());
block = it_block->second;
}
return symbol_exist;
}
bool BlockMemAssigner::IsSymbolDescBlockExist(const NodeIndexIO &node_index_io, std::string &symbol,
MemoryBlock *&block) const {
const auto node_io = node_index_io.ToString();
auto iter = anchor_to_symbol_.find(node_io);
if (iter == anchor_to_symbol_.end()) {
return false;
}
symbol = iter->second;
GELOGD("Node io:%s symbol:%s", node_io.c_str(), symbol.c_str());
const auto it_block = symbol_desc_blocks_.find(iter->second);
if (it_block == symbol_desc_blocks_.cend()) {
return false;
}
block = it_block->second;
return true;
}
void BlockMemAssigner::PrintSymbolMap() {
if (!IsLogEnable(GE, DLOG_DEBUG)) {
return;
}
for (const auto &pair : symbol_to_anchors_) {
GELOGD("symbol=%s, max_size=%zu, pre_reuse=%s, post_reuse=%s", pair.first.c_str(),
symbol_mem_reuse_info_[pair.first].size_,
symbol_mem_reuse_info_[pair.first].pre_reuse_flag_ ? "true" : "false",
symbol_mem_reuse_info_[pair.first].post_reuse_flag_ ? "true" : "false");
for (const auto &node_index_io : pair.second) {
GELOGD("anchor:%s id:%" PRId64 "", node_index_io.ToString().c_str(),
((node_index_io.node_ptr_ != nullptr) && (node_index_io.node_ptr_->GetOpDescBarePtr() != nullptr))
? node_index_io.node_ptr_->GetOpDescBarePtr()->GetId() : 0);
}
}
}
void BlockMemAssigner::GetSymbolMemType(const std::list<NodeIndexIO> &node_index_io_list, int64_t &memory_type) {
memory_type = RT_MEMORY_HBM;
std::vector<int64_t> memory_types;
for (auto &node_index_io : node_index_io_list) {
auto op_desc = node_index_io.node_ptr_->GetOpDescBarePtr();
GE_CHECK_NOTNULL_JUST_RETURN(op_desc);
if (node_index_io.io_type_ == kIn) {
std::vector<int64_t> input_memory_types;
(void) ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, input_memory_types);
if (!input_memory_types.empty() && node_index_io.index_ < input_memory_types.size()) {
int64_t input_memory_type = input_memory_types[node_index_io.index_];
GELOGD("Node[%s]: the memory type of input index [%u] is [%" PRId64 "]].", op_desc->GetNamePtr(),
node_index_io.index_, input_memory_type);
memory_types.emplace_back(input_memory_type);
}
}
if (node_index_io.io_type_ == kOut) {
std::vector<int64_t> output_memory_types;
(void) ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, output_memory_types);
if (!output_memory_types.empty() && node_index_io.index_ < output_memory_types.size()) {
int64_t output_memory_type = output_memory_types[node_index_io.index_];
GELOGD("Node[%s]: the memory type of output index [%u] is [%" PRId64 "].", op_desc->GetNamePtr(),
node_index_io.index_, output_memory_type);
memory_types.emplace_back(output_memory_type);
}
}
}
for (auto node_memory_type : memory_types) {
if (node_memory_type > memory_type) {
memory_type = node_memory_type;
}
}
}
void BlockMemAssigner::UpdateOpTensorMemType(const std::list<NodeIndexIO> &node_index_io_list, int64_t memory_type) {
for (const auto &node_index_io : node_index_io_list) {
auto op_desc = node_index_io.node_ptr_->GetOpDescBarePtr();
GE_CHECK_NOTNULL_JUST_RETURN(op_desc);
if (node_index_io.io_type_ == kIn) {
auto input_desc = op_desc->MutableInputDesc(node_index_io.index_);
int_attr_.emplace_back(input_desc.get(), op_desc, node_index_io.index_, ATTR_NAME_TENSOR_MEM_TYPE, memory_type);
}
if (node_index_io.io_type_ == kOut) {
auto output_desc = op_desc->MutableOutputDesc(node_index_io.index_);
int_attr_.emplace_back(output_desc.get(), op_desc, node_index_io.index_, ATTR_NAME_TENSOR_MEM_TYPE, memory_type);
}
}
}
bool BlockMemAssigner::IsNodeAndPeerNodeTaskSupportZeroCopy(const ge::NodePtr &node, uint32_t output_index) const {
GELOGD("Check node %s and peer node of output %u task zero copy supported", node->GetNamePtr(), output_index);
if (is_feature_map_refreshable_) {
return true;
}
if (!IsNodeSupportZeroCopy(node.get())) {
return false;
}
const auto out_anchor = node->GetOutDataAnchor(static_cast<int32_t>(output_index));
if (out_anchor == nullptr) {
return false;
}
const auto peer_anchors = out_anchor->GetPeerInDataAnchorsPtr();
const bool support = std::all_of(peer_anchors.begin(), peer_anchors.end(), [](const ge::InDataAnchor *anchor) {
return ((anchor != nullptr) && (anchor->GetOwnerNodeBarePtr() != nullptr) &&
IsNodeSupportZeroCopy(anchor->GetOwnerNodeBarePtr()));
});
GELOGD("Task of node %s and peer node of output %u %s zero copy", node->GetNamePtr(), output_index,
(support ? "support" : "not support"));
return support;
}
bool BlockMemAssigner::IsZeroCopyBlock(const NodePtr &node, uint32_t output_index, bool continuous,
size_t output_size) const {
std::string op_type(node->GetTypePtr());
if (NodeUtils::IsDynamicShape(node)) {
if (compute_graph_.get() != node->GetOwnerComputeGraphBarePtr()) {
return false;
}
if (OpTypeUtils::IsDataNode(op_type)) {
return (!continuous);
}
if (is_static_model_addr_fixed_ && (node->GetOpDesc()->GetOpKernelLibName() == ge::kEngineNameHccl)) {
return false;
}
return (GetOutputFlowToNetoutputNum(node, output_index, compute_graph_, symbol_to_anchors_, anchor_to_symbol_) >
0U);
}
if (is_io_alloc_by_ge_in_run_graph_ && (output_size > input_fusion_size_)) {
return false;
}
if (continuous) {
GELOGD("Node %s output %u cannot zero copy as require continuous output", node->GetNamePtr(), output_index);
return false;
}
if (op_type == NETOUTPUT) {
const auto owner = node->GetOwnerComputeGraphBarePtr();
bool ret = (owner != nullptr) && (owner->GetParentGraph() == nullptr);
GELOGD("Node %s output %u result %d", node->GetNamePtr(), output_index, ret);
return ret;
}
if (OpTypeUtils::IsDataNode(op_type)) {
bool is_multi_batch_shape_data = false;
(void)AttrUtils::GetBool(node->GetOpDesc(), "_is_multi_batch_shape_data", is_multi_batch_shape_data);
if (is_multi_batch_shape_data) {
GELOGD("Multi batch shape data node[%s] output memory no need zero copy.",
node->GetName().c_str());
return false;
}
if (node->GetOpDescBarePtr()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX)) {
return false;
}
return IsNodeAndPeerNodeTaskSupportZeroCopy(node, output_index);
}
if (GetOutputFlowToNetoutputNum(node, output_index, compute_graph_, symbol_to_anchors_, anchor_to_symbol_) ==
1U) {
return IsNodeAndPeerNodeTaskSupportZeroCopy(node, output_index);
}
return false;
}
namespace {
void MarkZeroCopyBlockAttr(std::vector<TAttr<bool>> &bool_attr, const OpDesc *const op_desc, bool is_zero_copy,
bool mem_type, uint32_t out_index) {
if (is_zero_copy && (mem_type == kOutput)) {
auto output_desc = op_desc->MutableOutputDesc(out_index);
if (output_desc != nullptr) {
bool_attr.emplace_back(output_desc.get(), op_desc, out_index, ATTR_IS_ZERO_COPY_BLOCK, true);
} else {
GELOGE(PARAM_INVALID, "Node %s output %u is zero copy block but not marked as output desc is nullptr",
op_desc->GetNamePtr(), out_index);
}
}
}
}
void BlockMemAssigner::AddMemoryStat(uint64_t memory_type, size_t real_size, bool is_reuse_memory) {
auto &memory_stat = memory_stat_[memory_type];
size_t align_size = real_size;
MemReuseUtils::AlignMemOffset(align_size);
memory_stat.total_memory_size_ += align_size;
if (is_reuse_memory) {
memory_stat.theory_memory_size_ += align_size;
} else {
memory_stat.theory_no_reuse_memory_size_ += align_size;
}
if (memory_stat.theory_memory_size_ > memory_stat.theory_min_memory_size_) {
memory_stat.theory_min_memory_size_ = memory_stat.theory_memory_size_;
}
}
MemoryBlock *BlockMemAssigner::ApplyMemory(const NodePtr &n, const std::vector<bool> &workspace_reuse_flag,
const ApplyMemoryParam ¶m) {
auto node_op_desc = n->GetOpDescBarePtr();
std::string batch_label;
(void)ge::AttrUtils::GetStr(node_op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
MemoryBlock *reusable_block = nullptr;
auto stream_id = GetStreamId(node_op_desc);
bool is_reuse_memory = false;
GetRealStreamIdForParentNode(n, param.out_index, stream_id, is_reuse_memory);
bool no_reuse = false;
std::string symbol;
const bool pre_reuse = (param.mem_type == kOutput) ?
IsPreReuse(NodeIndexIO(n.get(), param.out_index, kOut), symbol) : true;
const auto it_life_time_begin = cascade_min_life_time_.find(node_op_desc->GetNamePtr());
(void)ge::AttrUtils::GetBool(node_op_desc, kOpNoReuseMem, no_reuse);
if ((!no_reuse) && (param.mem_type == kWorkspace)) {
no_reuse = ((workspace_reuse_flag.size() > param.out_index) && !workspace_reuse_flag[param.out_index]);
}
const bool mod_life_begin = ((param.mem_type != kWorkspace) && (it_life_time_begin != cascade_min_life_time_.end())
&& (it_life_time_begin->second < life_begin_)) || no_reuse || (!pre_reuse);
if (mod_life_begin) {
life_begin_ = ((!pre_reuse) || no_reuse) ? kMinLifeTime : it_life_time_begin->second;
GELOGD("Node %s output %u life_begin_ change to %zu", n->GetNamePtr(), param.out_index, life_begin_);
}
bool diff_stream_prior = false;
bool post_reuse_flag = IsPostReuse(symbol, diff_stream_prior);
if ((param.mem_type == kOutput) && (!post_reuse_flag)) {
life_end_ = kMaxLifeTime;
}
const auto has_diff_stream_same_out = (param.mem_type == kOutput)
&& HasSameOutAnchorWithDiffStream(n.get(), param.out_index);
is_reuse_memory = is_reuse_memory && is_ge_reuse_mem_ && (param.mem_type != kOutputDesc)
&& !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && !no_reuse && param.is_op_reuse_mem;
auto &reusable_blocks = reusable_blocks_[param.memory_type][stream_id];
bool do_reuse = is_reuse_memory && pre_reuse && !param.continuous && (!param.is_zero_copy)
&& !has_diff_stream_same_out;
const NodeTypeIndex node_type_index{n.get(), param.mem_type, param.out_index, false, life_begin_, stream_id};
if (do_reuse) {
if (reuse_strategy_.reuse_first_release_) {
reusable_block = GetFirstReleaseBlock(param.block_size, batch_label, reusable_blocks, node_type_index);
} else {
reusable_block = GetLastReleaseBlock(param.block_size, batch_label, reusable_blocks, node_type_index);
}
}
auto block = reusable_block;
if (block == nullptr) {
block = new(std::nothrow) MemoryBlock(reuse_strategy_, param.block_size, stream_id, is_reuse_memory,
param.memory_type);
GE_CHECK_NOTNULL_EXEC(block, return nullptr);
memory_blocks_.emplace_back(block);
blocks_store_.emplace_back(block);
GELOGD("Node %s create new block:%s", n->GetNamePtr(), GetName(*block).c_str());
} else {
GELOGD("Node %s reuse block:%s", n->GetNamePtr(), GetName(*block).c_str());
}
if (param.mem_type == kOutput) {
block->AddSymbol(symbol);
block->post_reuse_flag_ = block->post_reuse_flag_ && post_reuse_flag;
block->diff_stream_prior_ = diff_stream_prior;
}
block->AddNodeTypeIndex(node_type_index, param.real_size, param.no_align_size, stream_id);
if (has_diff_stream_same_out) {
block->same_stream_ = false;
}
const bool post_reuse = IsPostReuse(block);
if (param.continuous) {
block->SetContinuousBlock();
}
block->batch_label_ = batch_label;
block->reuse_mem_ = block->reuse_mem_ && (post_reuse || pre_reuse);
if (life_end_ != 0U) {
block->SetLifeTimeEnd((life_end_ == kMaxLifeTime) ? kDefaultLifeTime : life_end_, stream_id);
block->SetSymbolLifeEnd(life_end_);
}
const bool cal_theory_size = (batch_label.empty() || (batch_label == max_batch_label_))
&& (node_op_desc->GetType() != ge::PARTITIONEDCALL) && ((!block->reuse_mem_) || pre_reuse);
if (cal_theory_size) {
AddMemoryStat(param.memory_type, param.real_size, block->reuse_mem_);
}
return block;
}
bool BlockMemAssigner::IsNodeOutputUseSameMemWithNetOutput(const ge::NodePtr &node, uint32_t out_index) const {
const auto cur_node_index_io = NodeIndexIO(node, out_index, kOut);
const auto &symbol_iter = anchor_to_symbol_.find(cur_node_index_io.ToString());
if (symbol_iter == anchor_to_symbol_.cend()) {
return false;
}
const auto &anchors_iter = symbol_to_anchors_.find(symbol_iter->second);
if (anchors_iter == symbol_to_anchors_.cend()) {
return false;
}
for (const auto &anchor : anchors_iter->second) {
if (anchor.node_->GetType() == NETOUTPUT) {
return true;
}
}
return false;
}
MemoryBlock *BlockMemAssigner::GetFirstReleaseBlock(const size_t block_size, const std::string &batch_label,
std::vector<MemoryBlock *> &reusable_blocks,
const NodeTypeIndex &node_type_index) const {
for (auto it = reusable_blocks.begin(); it != reusable_blocks.end(); ++it) {
MemoryBlock *reusable_block = *it;
if ((reusable_block == nullptr) ||
(!ReuseBlock(*reusable_block, block_size, life_begin_, batch_label, node_type_index))) {
continue;
}
reusable_blocks.erase(it);
return reusable_block;
}
return nullptr;
}
MemoryBlock *BlockMemAssigner::GetLastReleaseBlock(const size_t block_size, const std::string &batch_label,
std::vector<MemoryBlock *> &reusable_blocks,
const NodeTypeIndex &node_type_index) const {
for (auto it = reusable_blocks.rbegin(); it != reusable_blocks.rend(); ++it) {
MemoryBlock *reusable_block = *it;
if ((reusable_block == nullptr) ||
(!ReuseBlock(*reusable_block, block_size, life_begin_, batch_label, node_type_index))) {
continue;
}
reusable_blocks.erase((++it).base());
return reusable_block;
}
return nullptr;
}
bool IsOutputIndexRef(const OpDesc *const op_desc, uint32_t index) {
auto output_tensor = op_desc->GetOutputDescPtr(index);
if (output_tensor == nullptr) {
return false;
}
bool dst_reuse_input = false;
(void)ge::TensorUtils::GetReuseInput(*output_tensor, dst_reuse_input);
if (dst_reuse_input) {
return true;
}
bool is_ref = false;
(void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_REFERENCE, is_ref);
if (is_ref) {
std::string output_name = op_desc->GetOutputNameByIndex(index);
for (const auto &input_name : op_desc->GetAllInputNames()) {
if (output_name == input_name) {
return true;;
}
}
}
return false;
}
bool IsSubgraphDataRefConstInput(const NodePtr &node) {
std::string op_type;
const auto &in_node = ge::NodeUtils::GetParentInput(node);
return ge::NodeUtils::GetConstOpType(in_node, op_type) ||
((in_node != nullptr) && ge::OpTypeUtils::IsVariableNode(in_node->GetType()));
}
void BlockMemAssigner::ContinuousOutRefCheck(bool &is_all_output_ref, bool &is_output_has_ref, const NodePtr &n) {
const auto node_op_desc = n->GetOpDescBarePtr();
for (uint32_t index = 0U; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) {
if (!IsOutputIndexRef(node_op_desc, index)) {
is_all_output_ref = false;
break;
} else {
zero_memory_list_.emplace_back(n.get(), kOutput, index);
is_output_has_ref = true;
}
}
}
Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const std::vector<int64_t> &ranges,
const bool is_op_reuse_mem) {
auto node_op_desc = n->GetOpDescBarePtr();
GE_CHECK_NOTNULL(node_op_desc);
life_begin_ = node_op_desc->GetId();
bool is_all_output_ref = true;
bool is_output_has_ref = false;
ContinuousOutRefCheck(is_all_output_ref, is_output_has_ref, n);
if (is_all_output_ref) {
GELOGI("continuous output node ref all input, skip continuous alloc, node_name:%s", n->GetNamePtr());
return SUCCESS;
}
if (!is_all_output_ref && is_output_has_ref) {
REPORT_INNER_ERR_MSG("E19999", "continuous output node ref part input, not support now. node_name:%s",
n->GetNamePtr());
GELOGE(INTERNAL_ERROR, "[Check][OutRefStatus]continuous output node ref part input, not support, node_name:%s",
n->GetNamePtr());
return INTERNAL_ERROR;
}
MemoryBlock *block = nullptr;
size_t total_size = 0U;
uint64_t memory_type = RT_MEMORY_HBM;
int64_t max_life_time = 0;
std::string symbol;
std::set<int64_t> streams;
GetContinuousOutputMaxLife(n, symbol_to_anchors_, out_stream_edges_, max_life_time, streams);
for (uint32_t index = 0U; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) {
auto output_op_desc = node_op_desc->MutableOutputDesc(index);
if (CheckIsZeroMemNodeType(n->GetTypePtr()) || CheckIsZeroMemNodeOutputIndex(n, index)) {
zero_memory_list_.emplace_back(n.get(), kOutput, index);
continue;
}
int64_t size = 0;
if (MemReuseUtils::GetTensorSize(*output_op_desc, size, MemReuseUtils::IsNeedSplitSize(n, index)) != SUCCESS) {
REPORT_INNER_ERR_MSG("E19999", "get tensor_size failed, node_name:%s, output_index:%u",
n->GetNamePtr(), index);
GELOGE(INTERNAL_ERROR, "[Get][TensorSize]node_name:%s, output_index:%u", n->GetNamePtr(), index);
return INTERNAL_ERROR;
}
size_t align_size = static_cast<size_t>(size);
MemReuseUtils::AlignMemOffset(align_size);
total_size += align_size;
if (index != 0U) {
zero_memory_list_.emplace_back(n.get(), kOutput, index);
continue;
}
NodeIndexIO node_index_io(n.get(), index, kOut);
auto iter = anchor_to_symbol_.find(node_index_io.ToString());
if (iter != anchor_to_symbol_.end()) {
symbol = iter->second;
std::map<std::string, MemoryReuseInfo>::const_iterator it = symbol_mem_reuse_info_.find(symbol);
if (it != symbol_mem_reuse_info_.cend()) {
memory_type = it->second.mem_type_;
GELOGD("Continuous out memory symbol is [%s], memory type is [%" PRId64 "]", symbol.c_str(), memory_type);
}
}
}
if (total_size == 0U) {
return SUCCESS;
}
auto block_size = GetBlockSize(total_size, ranges, reuse_strategy_.use_range_);
std::vector<bool> workspace_reuse_flag;
ApplyMemoryParam param = {block_size, total_size, total_size, kOutput, 0U, is_op_reuse_mem, false, memory_type,
false};
block = ApplyMemory(n, workspace_reuse_flag, param);
if (block != nullptr) {
block->SetFirstContinuousBlock();
block->SetLastContinuousBlock();
block->need_same_offset_in_batch_ = SizeIndependentOfBatch(n->GetTypePtr());
bool is_reuse_zero_copy = true;
NodeIndexIO node_index_io(n.get(), 0, kOut);
int32_t ref_count = GetAllRefCount(node_index_io, is_reuse_zero_copy);
block->ref_count_ += ref_count;
block->is_reuse_zero_copy_ = (block->is_reuse_zero_copy_) && (is_reuse_zero_copy);
max_life_time = (max_life_time == kMaxLifeTime) ? kDefaultLifeTime : max_life_time;
block->SetLifeTimeEnd(max_life_time, GetStreamId(node_op_desc));
block->SetOutStreamCount(streams.size());
GELOGI("Node[%s] continuous out memory size[%zu] block size[%zu] out stream count:%zu ref_count:%d",
node_op_desc->GetNamePtr(), total_size, block_size, streams.size(), block->ref_count_);
if (!symbol.empty()) {
symbol_blocks_[symbol] = block;
auto iter = symbol_mem_reuse_info_.find(symbol);
if (iter != symbol_mem_reuse_info_.end()) {
iter->second.size_ = total_size;
block->is_fixed_addr_prior_ = (block->is_fixed_addr_prior_ || iter->second.is_fixed_addr_prior_);
}
GELOGD("Node io:%s add symbol:%s block:%s, fixed addr prior:%d",
NodeIndexIO(n.get(), 0, kOut).ToString().c_str(), symbol.c_str(),
GetName(*block).c_str(), block->is_fixed_addr_prior_);
}
} else {
REPORT_INNER_ERR_MSG("E19999", "apply continuousMemory failed, node_name:%s, total_size:%" PRId64 "",
n->GetNamePtr(), total_size);
GELOGE(INTERNAL_ERROR, "[Apply][ContinuousMemory]node_name:%s, total_size:%" PRId64 "", n->GetNamePtr(), total_size);
return INTERNAL_ERROR;
}
return SUCCESS;
}
* 根据continuous_mem_mng_中对节点输出的排布分配内存,要求IsFound必须返回true才调用该接口
*/
Status BlockMemAssigner::ApplyContinuousMemWithMng(const NodePtr &n, int32_t idx, const std::vector<int64_t> &ranges) {
auto op_desc = n->GetOpDescBarePtr();
GE_CHECK_NOTNULL(op_desc);
life_begin_ = op_desc->GetId();
GE_ASSERT_TRUE(continuous_mem_mng_.IsFound(n.get(), idx));
if (!continuous_mem_mng_.IsNeedAssignMemory(n.get(), idx)) {
zero_memory_list_.emplace_back(n.get(), kOutput, idx, false);
GELOGI("[ContinuousMem]node[%s] output %u, no need assign memory.", op_desc->GetNamePtr(), idx);
return SUCCESS;
}
size_t out_streams_cnt = 1U;
const auto &continuous_mem = continuous_mem_mng_.GetContinuousMem(n.get(), idx);
is_op_reuse_mem_ = is_op_reuse_mem_ && continuous_mem.IsReuse();
if (is_op_reuse_mem_) {
int64_t begin_time = 0;
int64_t out_time = 0;
int64_t end_time = 0;
GE_ASSERT_SUCCESS(GetContinuousMemLifeTime(continuous_mem, begin_time, out_time, end_time,
out_streams_cnt));
(void)out_time;
life_begin_ = begin_time;
life_end_ = end_time;
}
uint64_t memory_type = RT_MEMORY_HBM;
GE_ASSERT_SUCCESS(GetContinuousMemType(continuous_mem, memory_type));
* 对于连续输出-连续输入这种场景,不应该出现带有该属性的情况,明确报错不支持
*/
for (const auto &continuous_node_out : continuous_mem.GetContinuousNodeOut()) {
std::vector<int64_t> offsets;
(void)AttrUtils::GetListInt(continuous_node_out.node_ptr_->GetOpDescBarePtr(),
ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets);
GE_ASSERT_TRUE(offsets.empty(), "[ContinuousMem] check buffer fusion offset failed. node: %s has attr: %s",
continuous_node_out.node_ptr_->GetNamePtr(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION.c_str());
}
const auto max_size = continuous_mem.GetTotalSize();
const auto no_align_size = max_size;
MemoryBlock *block = nullptr;
NodeIndexIO node_index_io(n.get(), idx, kOut);
auto symbol_iter = anchor_to_symbol_.find(node_index_io.ToString());
GE_ASSERT_TRUE(symbol_iter != anchor_to_symbol_.end());
const auto symbol = symbol_iter->second;
const auto iter = symbol_mem_reuse_info_.find(symbol);
if (iter != symbol_mem_reuse_info_.end()) {
iter->second.size_ = max_size;
}
const auto block_size = GetBlockSize(max_size, ranges, reuse_strategy_.use_range_);
std::vector<bool> workspace_reuse_flag;
const auto is_zeor_copy = IsZeroCopyBlock(n, idx, true, no_align_size);
ApplyMemoryParam param = {block_size, max_size, no_align_size, kOutput, static_cast<uint32_t>(idx), is_op_reuse_mem_,
false, memory_type, is_zeor_copy};
block = ApplyMemory(n, workspace_reuse_flag, param);
GE_ASSERT_NOTNULL(block);
GE_ASSERT_SUCCESS(continuous_mem_mng_.PushBackBlock(n.get(), idx, block));
block->need_same_offset_in_batch_ = SizeIndependentOfBatch(n->GetTypePtr());
if (continuous_mem.IsUseOneBlock()) {
block->SetFirstContinuousBlock();
block->SetLastContinuousBlock();
}
bool is_reuse_zero_copy = true;
const auto ref_count = GetAllRefCount(node_index_io, is_reuse_zero_copy);
block->is_reuse_zero_copy_ = (block->is_reuse_zero_copy_) && (is_reuse_zero_copy);
block->ref_count_ = block->ref_count_ + ref_count;
if (iter != symbol_mem_reuse_info_.cend()) {
block->is_fixed_addr_prior_ = (block->is_fixed_addr_prior_ || iter->second.is_fixed_addr_prior_);
}
MarkReuseZeroCopyBlockFlag(n, block, idx);
MarkZeroCopyBlockAttr(bool_attr_, op_desc, block->is_zero_copy_, kOutput, idx);
GELOGI("[ContinuousMem]Node name: %s index:%u size:%zu ref count: %d, zero copy:%d, fixed addr prior: %d, "
"out_streams_cnt: %zu, memory_type: %d", n->GetNamePtr(), idx, block_size, block->ref_count_,
block->is_zero_copy_, block->is_fixed_addr_prior_, out_streams_cnt, memory_type);
block->SetOutStreamCount(out_streams_cnt);
block->is_reuse_zero_copy_ = (is_reuse_zero_copy && block->is_reuse_zero_copy_);
symbol_blocks_[symbol] = block;
CheckAndReleaseSuspendedBlock(n, idx, block);
return SUCCESS;
}
Status BlockMemAssigner::GetContinuousMemType(const ContinuousMem &continuous_mem, uint64_t &memory_type) const {
const auto &all_outs = continuous_mem.GetContinuousNodeOut();
memory_type = RT_MEMORY_HBM;
uint64_t first_special_type = RT_MEMORY_HBM;
for (size_t i = 0U; i < all_outs.size(); ++i) {
NodeIndexIO node_index_io(all_outs.at(i).node_ptr_, all_outs.at(i).index_, kOut);
auto symbol_iter = anchor_to_symbol_.find(node_index_io.ToString());
GE_ASSERT_TRUE(symbol_iter != anchor_to_symbol_.end());
const auto symbol = symbol_iter->second;
const auto iter = symbol_mem_reuse_info_.find(symbol);
GE_ASSERT_TRUE(iter != symbol_mem_reuse_info_.end());
if (iter->second.mem_type_ > iter->second.mem_type_) {
memory_type = iter->second.mem_type_;
}
if (MemTypeUtils::IsMemoryTypeSpecial(static_cast<int64_t>(iter->second.mem_type_))) {
if (first_special_type == RT_MEMORY_HBM) {
first_special_type = memory_type;
} else {
GE_ASSERT_TRUE(first_special_type == memory_type, "memory type conflict,"
" there are two different special memory type in one continuous memory block[%llu, %llu]",
first_special_type, memory_type);
}
}
}
return SUCCESS;
}
* begin_time: 符号最小的id
* out_time: 符号内最大的id
* end_time: 首节点的流和输出节点的流如果不一样,要计算回到首节点流的id. end_time大于等于out_time
*/
Status BlockMemAssigner::GetContinuousMemLifeTime(const ContinuousMem &continuous_mem, int64_t &begin_time,
int64_t &out_time, int64_t &end_time, size_t &out_streams_cnt) const {
const auto &all_out = continuous_mem.GetContinuousNodeOut();
GE_ASSERT_TRUE(!all_out.empty());
const auto &first_node = all_out.front();
begin_time = first_node.node_ptr_->GetOpDescBarePtr()->GetId();
std::set<int64_t> streams;
for (const auto &node_out : all_out) {
begin_time = std::min(begin_time, node_out.node_ptr_->GetOpDescBarePtr()->GetId());
const auto ret = GetNodeMaxLifeBySymbol(symbol_to_anchors_, node_out.node_ptr_, node_out.index_, out_time, streams,
out_stream_edges_, GetStreamId(first_node.node_ptr_->GetOpDescBarePtr()));
end_time = std::max(end_time, ret);
}
out_streams_cnt = streams.size();
return SUCCESS;
}
int32_t BlockMemAssigner::GetAllRefCount(const NodeIndexIO &out_node_index_io, bool &is_reuse_zero_copy) const {
int32_t ref_count = 0;
auto iter_symbol = anchor_to_symbol_.find(out_node_index_io.ToString());
if (iter_symbol == anchor_to_symbol_.end()) {
return ref_count;
}
auto iter = symbol_to_anchors_.find(iter_symbol->second);
if (iter != symbol_to_anchors_.end()) {
for (const auto &node_index_io : iter->second) {
if (node_index_io.node_ptr_ == nullptr) {
continue;
}
is_reuse_zero_copy = (is_reuse_zero_copy && CanReuseZeroCopyBlock(node_index_io.node_ptr_));
if (node_index_io.io_type_ != kIn) {
continue;
}
if ((node_index_io.node_ptr_->GetInDataAnchor(node_index_io.index_) == nullptr) ||
(node_index_io.node_ptr_->GetInDataAnchor(node_index_io.index_)->GetPeerOutAnchor() == nullptr)) {
GELOGI("Node: %s has no input.", node_index_io.node_ptr_->GetNamePtr());
continue;
}
ref_count++;
}
GELOGD("symbol=%s, ref count is %d", out_node_index_io.ToString().c_str(), ref_count);
}
return ref_count;
}
Status BlockMemAssigner::GetOutputTotalSizeAndOutCount(const NodePtr &n, uint32_t output_index, size_t &max_size,
size_t &no_align_size, int32_t &out_count,
bool is_separate_clean_continuous_inputs) const {
const auto node_op_desc = n->GetOpDescBarePtr();
GE_CHECK_NOTNULL(node_op_desc);
const auto out_data_anchor = n->GetOutDataAnchor(static_cast<int32_t>(output_index));
GE_CHECK_NOTNULL(out_data_anchor);
for (const auto in_anchor : out_data_anchor->GetPeerInDataAnchorsPtr()) {
auto owner_node = in_anchor->GetOwnerNodeBarePtr();
auto op_desc = owner_node->GetOpDescBarePtr();
if (op_desc == nullptr) {
continue;
}
Params *instance = Params::Instance();
GE_CHECK_NOTNULL(instance);
if (!((instance->GetTarget() == TARGET_TYPE_TINY) && (op_desc->GetType() == NETOUTPUT))) {
out_count++;
}
}
if (!is_separate_clean_continuous_inputs) {
const auto output_op_desc = node_op_desc->MutableOutputDesc(output_index);
GE_CHECK_NOTNULL(output_op_desc);
int64_t size = 0;
GE_CHK_STATUS_RET(MemReuseUtils::GetTensorSize(*output_op_desc, size,
MemReuseUtils::IsNeedSplitSize(n, output_index)),
"Get node %s output %" PRId64 " size failed", node_op_desc->GetNamePtr(), output_index);
max_size = static_cast<size_t>(size);
GE_CHK_STATUS_RET(MemReuseUtils::GetOutputNoAlignSize(*node_op_desc, output_index, no_align_size),
"Get node_name:%s, output_index:%u no align size failed", n->GetNamePtr(), output_index);
return SUCCESS;
}
for (const auto out_node_in_anchor : out_data_anchor->GetPeerInDataAnchorsPtr()) {
if (out_node_in_anchor == nullptr) {
continue;
}
const auto out_node = out_node_in_anchor->GetOwnerNodeBarePtr();
bool is_input_continuous = MemLayoutConflictUtil::IsContinuousInput(out_node);
if (!is_input_continuous) {
continue;
}
size_t total_size = 0U;
for (const auto input_anchor : out_node->GetAllInDataAnchorsPtr()) {
GE_CHECK_NOTNULL(input_anchor);
auto in_node_out_anchor = input_anchor->GetPeerOutAnchor();
if (in_node_out_anchor == nullptr) {
continue;
}
const auto in_node = in_node_out_anchor->GetOwnerNodeBarePtr();
const auto in_op_desc = in_node->GetOpDescBarePtr();
GE_CHECK_NOTNULL(in_op_desc);
const auto output_op_desc = in_op_desc->MutableOutputDesc(in_node_out_anchor->GetIdx());
int64_t size = 0;
GE_CHK_STATUS_RET(MemReuseUtils::GetTensorSize(*output_op_desc, size,
MemReuseUtils::IsNeedSplitSize(n, output_index)),
"Get node %s out %" PRId64 " size failed", in_node->GetNamePtr(), in_node_out_anchor->GetIdx());
size_t align_size = static_cast<size_t>(size);
MemReuseUtils::AlignMemOffset(align_size);
total_size += align_size;
}
max_size = max_size < total_size ? total_size : max_size;
}
no_align_size = max_size;
return SUCCESS;
}
void BlockMemAssigner::CalExitSymbolNodeLifeTime(const Node *const n, uint32_t out_index, size_t &max_life_time) {
max_life_time = life_time_;
bool is_cur_node_input_continuous = false;
(void)ge::AttrUtils::GetBool(n->GetOpDescBarePtr(), ATTR_NAME_NOPADDING_CONTINUOUS_INPUT,
is_cur_node_input_continuous);
if (!is_cur_node_input_continuous) {
is_cur_node_input_continuous = IsSeparateCleanContinuousInputNode(n);
}
const auto &out_anchor = n->GetOutDataAnchor(out_index);
if (is_cur_node_input_continuous || (out_anchor == nullptr)) {
return;
}
std::set<int64_t> streams;
for (const auto &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) {
if (peer_in_anchor == nullptr) {
continue;
}
const auto &out_node = peer_in_anchor->GetOwnerNodeBarePtr();
bool is_input_continuous = false;
(void)ge::AttrUtils::GetBool(out_node->GetOpDescBarePtr(), ATTR_NAME_NOPADDING_CONTINUOUS_INPUT,
is_input_continuous);
if (!is_input_continuous) {
is_input_continuous = IsSeparateCleanContinuousInputNode(out_node);
}
if (!is_input_continuous) {
continue;
}
if (peer_in_anchor->GetIdx() != 0) {
continue;
}
int64_t node_max_life_time = 0;
CalNodeAsContinuousInputMaxLife(n, out_index, out_node, node_max_life_time, streams);
if (max_life_time < static_cast<size_t>(node_max_life_time)) {
max_life_time = static_cast<size_t>(node_max_life_time);
}
}
GELOGI("Node[%s:%u] has exit symbol, it's max_life_time:%zu stream count:%zu", n->GetNamePtr(), out_index,
max_life_time, streams.size());
}
void BlockMemAssigner::MarkReuseZeroCopyBlockFlag(const NodePtr &n, MemoryBlock *const block,
const uint32_t index) const {
auto node_op_desc = n->GetOpDescBarePtr();
bool can_reuse_zero_copy = true;
bool is_continuous = ge::MemLayoutConflictUtil::IsContinuousOutput(n);
bool is_nopadding_continuous = false;
if (!is_continuous) {
(void)ge::AttrUtils::GetBool(*node_op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, is_nopadding_continuous);
if (is_nopadding_continuous) {
bool attr_reuse = false;
(void)ge::AttrUtils::GetBool(*node_op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
can_reuse_zero_copy = !attr_reuse;
}
} else {
can_reuse_zero_copy = false;
}
if (!can_reuse_zero_copy) {
block->is_reuse_zero_copy_ = false;
}
const static std::set<std::string> kTaskUnsupportZeroCopyOp{ge::STREAMSWITCH, ge::LABELSWITCHBYINDEX};
auto out_anchor = n->GetOutDataAnchor(static_cast<int32_t>(index));
bool data_connect_unsupport_zero_copy = false;
if (OpTypeUtils::IsDataNode(node_op_desc->GetType()) && (!is_feature_map_refreshable_) && (out_anchor != nullptr)) {
auto peer_anchors = out_anchor->GetPeerInDataAnchorsPtr();
data_connect_unsupport_zero_copy =
std::any_of(peer_anchors.begin(), peer_anchors.end(), [](const ge::InDataAnchor *anchor) {
return ((anchor != nullptr) && (anchor->GetOwnerNodeBarePtr() != nullptr) &&
(kTaskUnsupportZeroCopyOp.count(anchor->GetOwnerNodeBarePtr()->GetTypePtr()) > 0U));
});
}
if (data_connect_unsupport_zero_copy) {
block->is_reuse_zero_copy_ = false;
block->is_zero_copy_ = false;
}
GELOGD("Node name: %s index: %d, can_reuse_zero_copy: %s, data_connect_unsupport_zero_copy: %s ", n->GetNamePtr(),
index, can_reuse_zero_copy ? "true" : "false", data_connect_unsupport_zero_copy ? "true" : "false");
}
MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const std::vector<int64_t> &ranges,
const bool is_op_reuse_mem, const bool out_node_need_continuous_input) {
if (index >= n->GetAllOutDataAnchorsSize()) {
GELOGE(FAILED, "[Check][OutIndex]index:%u exceed out_size:%u, node_name:%s", index, n->GetAllOutDataAnchorsSize(),
n->GetNamePtr());
return nullptr;
}
const auto out_data_anchor = n->GetOutDataAnchor(index);
auto node_op_desc = n->GetOpDescBarePtr();
if ((out_data_anchor == nullptr) || (node_op_desc == nullptr)) {
GELOGE(FAILED, "[Check][OutAnchor]is null, index:%u, node_name:%s", index, n->GetNamePtr());
return nullptr;
}
size_t size = 0U;
size_t no_align_size = 0U;
int32_t out_count = 0;
size_t block_size = 0U;
if (GetOutputTotalSizeAndOutCount(n, index, size, no_align_size, out_count, is_separate_clean_continuous_inputs_) !=
SUCCESS) {
GELOGE(FAILED, "Get output total size failed");
return nullptr;
}
std::string symbol;
MemoryBlock *block = nullptr;
NodeIndexIO node_index_io(n.get(), index, kOut);
if (IsSymbolExist(node_index_io, symbol, block)) {
GE_IF_BOOL_EXEC(block == nullptr,
REPORT_INNER_ERR_MSG("E19999", "get ref block failed, node_name:%s, symbol:%s",
node_op_desc->GetNamePtr(), node_index_io.ToString().c_str());
GELOGE(FAILED, "[Get][RefBlock]node_name:%s, symbol:%s",
node_op_desc->GetNamePtr(), node_index_io.ToString().c_str());
return nullptr);
const bool
cal_theory_size = (!block->RealSizeList().empty()) && (block->NodeTypeIndexList().back().node_ != nullptr)
&& (block->NodeTypeIndexList().back().node_->GetOpDescBarePtr() != nullptr)
&& (block->NodeTypeIndexList().back().node_->GetOpDescBarePtr()->GetType() == ge::PARTITIONEDCALL)
&& (node_op_desc->GetType() != ge::PARTITIONEDCALL);
if (cal_theory_size) {
AddMemoryStat(block->memory_type_, block->RealSizeList().back(), block->reuse_mem_);
}
block_size = GetBlockSize(size, ranges, reuse_strategy_.use_range_);
block->SetSize(block_size);
size_t symbol_life_time = life_time_;
CalExitSymbolNodeLifeTime(node_index_io.node_ptr_, node_index_io.index_, symbol_life_time);
block->SetSymbolLifeEnd(symbol_life_time);
block->SetLifeTimeEnd(life_time_, block->stream_id_);
block->AddNodeTypeIndex(
{n.get(), kOutput, index, true, life_begin_,
GetStreamId(node_op_desc), false, block->GetSymbolLifeEnd()},
size, no_align_size, block->stream_id_);
block->has_sub_graph_in_out_node_ = block->has_sub_graph_in_out_node_ ||
PeerIsSubGraphNetOutNode(n, out_data_anchor, compute_graph_) ||
IsSubGraphInOrOutNode(n.get(), compute_graph_);
bool no_reuse_flag = false;
(void)ge::AttrUtils::GetBool(node_op_desc, kOpNoReuseMem, no_reuse_flag);
block->reuse_mem_ = block->reuse_mem_ && (!no_reuse_flag) && is_op_reuse_mem;
if (out_count == 0) {
block->ref_count_++;
}
} else {
if (IsOutputIndexRef(node_op_desc, index) ||
(IsSubgraphDataRefConstInput(n) && (!IsDirectInputNode(n.get(), compute_graph_)))) {
zero_memory_list_.emplace_back(n.get(), kOutput, index, false);
GELOGI("ref mode skip out block assign. node_name: %s, index:%d", n->GetNamePtr(), index);
return nullptr;
}
size_t max_size = size;
auto iter = symbol_mem_reuse_info_.find(symbol);
if (iter != symbol_mem_reuse_info_.end()) {
if (!is_separate_clean_continuous_inputs_) {
max_size = iter->second.size_;
} else {
iter->second.size_ = max_size;
}
}
uint64_t memory_type = RT_MEMORY_HBM;
if (iter != symbol_mem_reuse_info_.cend()) {
memory_type = iter->second.mem_type_;
}
block_size = GetBlockSize(max_size, ranges, reuse_strategy_.use_range_);
std::vector<bool> workspace_reuse_flag;
bool as_input_continuous = is_separate_clean_continuous_inputs_ ? false : out_node_need_continuous_input;
bool is_zeor_copy = IsZeroCopyBlock(n, index, out_node_need_continuous_input, no_align_size);
ApplyMemoryParam param = {block_size, max_size, no_align_size, kOutput, index, is_op_reuse_mem, as_input_continuous,
memory_type, is_zeor_copy};
block = ApplyMemory(n, workspace_reuse_flag, param);
GE_CHECK_NOTNULL_EXEC(block, return nullptr);
block->need_same_offset_in_batch_ = SizeIndependentOfBatch(n->GetTypePtr());
if (is_separate_clean_continuous_inputs_) {
block->SetFirstContinuousBlock();
block->SetLastContinuousBlock();
}
block->is_zero_copy_ =
block->is_zero_copy_ ||
IsZeroCopyBlock(n, index, (out_node_need_continuous_input || block->GetContinuousFlag()), no_align_size);
bool is_reuse_zero_copy = true;
int32_t ref_count = GetAllRefCount(node_index_io, is_reuse_zero_copy);
block->is_reuse_zero_copy_ = (block->is_reuse_zero_copy_) && (is_reuse_zero_copy);
if (ref_count > out_count) {
out_count = ref_count;
}
block->ref_count_ = block->ref_count_ + out_count;
if (iter != symbol_mem_reuse_info_.cend()) {
block->is_fixed_addr_prior_ = (block->is_fixed_addr_prior_ || iter->second.is_fixed_addr_prior_);
}
GELOGD("Node name: %s size:%zu ref count: %d, out count: %d zero copy:%d, fixed addr prior: %d", n->GetNamePtr(),
block_size, block->ref_count_, out_count, block->is_zero_copy_, block->is_fixed_addr_prior_);
}
MarkReuseZeroCopyBlockFlag(n, block, index);
MarkZeroCopyBlockAttr(bool_attr_, node_op_desc, block->is_zero_copy_, kOutput, index);
GELOGI("Node name: %s index:%u size:%zu ref count: %d, out count: %d zero copy:%d, out node need continuous input %d",
n->GetNamePtr(), index, block_size, block->ref_count_, out_count, block->is_zero_copy_,
out_node_need_continuous_input);
return block;
}
MemoryBlock *BlockMemAssigner::ApplyOutDescMemory(const NodePtr &n, uint32_t index,
const std::vector<int64_t> &ranges) {
GELOGI("Node[%s] tensor[%u] apply output desc memory.", n->GetNamePtr(), index);
size_t size = sizeof(RuntimeTensorDesc);
auto block_size = GetBlockSize(size, ranges, reuse_strategy_.use_range_);
std::vector<bool> workspace_reuse_flag;
MemoryBlock *block = nullptr;
NodeIndexIO node_index_io(n.get(), index, kOut);
std::string symbol;
if (IsSymbolDescBlockExist(node_index_io, symbol, block)) {
GE_IF_BOOL_EXEC(block == nullptr,
REPORT_INNER_ERR_MSG("E19999", "get ref block failed, node_name:%s, symbol:%s",
n->GetNamePtr(), node_index_io.ToString().c_str());
GELOGE(FAILED, "[Get][RefBlock]node_name:%s, symbol:%s",
n->GetNamePtr(), node_index_io.ToString().c_str());
return nullptr);
block->AddNodeTypeIndex({n.get(), kOutputDesc, index, true, 0}, size, size, GetStreamId(n->GetOpDescBarePtr()));
GELOGD("Ref tensor desc, symbol[%s], anchor[%s].", symbol.c_str(), node_index_io.ToString().c_str());
} else {
ApplyMemoryParam param = {block_size, size, size, kOutputDesc, index, false, false, RT_MEMORY_HBM, false};
block = ApplyMemory(n, workspace_reuse_flag, param);
if (block == nullptr) {
REPORT_INNER_ERR_MSG("E19999", "apply out desc Memory failed, node_name:%s, block_size:%" PRId64 ", out_index:%u",
n->GetNamePtr(), block_size, index);
GELOGE(FAILED, "[Apply][Memory]node_name:%s, block_size:%" PRId64 ", out_index:%u", n->GetNamePtr(),
block_size, index);
return nullptr;
}
bool is_fixed_addr_prior = false;
(void) ge::AttrUtils::GetBool(n->GetOpDesc(), ATTR_NAME_IS_FIXED_ADDR_PRIOR, is_fixed_addr_prior);
block->is_fixed_addr_prior_ = (block->is_fixed_addr_prior_ || is_fixed_addr_prior);
GELOGD("%s's output desc memory fixed addr prior:%d, index:%zu.",
n->GetNamePtr(), block->is_fixed_addr_prior_, index);
}
auto iter = anchor_to_symbol_.find(node_index_io.ToString());
if (iter != anchor_to_symbol_.end()) {
GELOGD("Add to ref tensor, symbol[%s], anchor[%s] ", iter->second.c_str(), node_index_io.ToString().c_str());
symbol_desc_blocks_[iter->second] = block;
}
return block;
}
bool IsOutputBlock(const ge::InDataAnchor *const in_data_anchor) {
auto peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
GE_IF_BOOL_EXEC(peer_out_anchor == nullptr,
REPORT_INNER_ERR_MSG("E19999", "Peer out anchor is nullptr.");
GELOGE(FAILED, "[Check][Param] Peer out anchor is nullptr."); return false);
auto src = peer_out_anchor->GetOwnerNodeBarePtr();
int32_t index = peer_out_anchor->GetIdx();
auto iter = GetLocalOmgContext().out_nodes_map.find(src->GetNamePtr());
if (iter != GetLocalOmgContext().out_nodes_map.end()) {
for (auto id : iter->second) {
if (index == id) {
return true;
}
}
}
return false;
}
bool BlockMemAssigner::IsAtomicOutputMemory(const ge::NodePtr &node, uint32_t output_index, bool is_atomic,
bool out_node_set_continuous_input) const {
auto op_desc = node->GetOpDescBarePtr();
if (op_desc == nullptr) {
return false;
}
bool is_output_continuous = ge::MemLayoutConflictUtil::IsContinuousOutput(node);
if (!is_output_continuous) {
(void)ge::AttrUtils::GetBool(op_desc, ge::ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, is_output_continuous);
}
if ((!out_node_set_continuous_input) && is_atomic && (!is_output_continuous)) {
if (IsZeroCopyBlock(node, output_index, is_output_continuous)) {
GELOGI("atomic clean and zero copy, need assign memory, node:%s(%s), output_index: %u",
node->GetNamePtr(), node->GetTypePtr(), output_index);
return false;
}
std::vector<int64_t> atomic_output_index;
(void)ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index);
for (auto &index : atomic_output_index) {
if (static_cast<uint32_t>(index) == output_index) {
if (node->GetOwnerComputeGraphBarePtr() != nullptr) {
GELOGD("Atomic no assign %s name[%s] output[%" PRId64 "] streamid[%" PRId64 "].",
node->GetOwnerComputeGraphBarePtr()->GetName().c_str(), op_desc->GetNamePtr(), index,
GetStreamId(op_desc));
}
return true;
}
}
}
return false;
}
bool IsKnownSubgraphData(const Node *node) {
if ((node == nullptr) || NodeUtils::IsDynamicShape(*node)) {
return false;
}
return node->GetOpDescBarePtr()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX);
}
void SetReleaseBlockLifeEnd(MemoryBlock *to_release, int64_t stream_id) {
const auto to_release_out_stream_life_time = to_release->NodeTypeIndexList().back().out_stream_life_time_;
if (to_release_out_stream_life_time.size() == 1) {
for (const auto &item : to_release_out_stream_life_time) {
size_t end_life_time = item.second.second;
int64_t release_stream_id = to_release->stream_id_;
if (to_release->stream_id_ != item.first) {
if (end_life_time == kMaxLifeTime) {
release_stream_id = stream_id;
end_life_time = item.second.first;
}
}
to_release->SetLifeTimeEnd(end_life_time, release_stream_id);
if (to_release->diff_stream_prior_ && (to_release->stream_id_ != stream_id)) {
to_release->SetLifeTimeEnd(item.second.first, stream_id);
}
}
return;
}
size_t max_end_life_time = 0U;
for (const auto &item : to_release_out_stream_life_time) {
if (max_end_life_time < item.second.second) {
max_end_life_time = item.second.second;
}
}
if (max_end_life_time == kMaxLifeTime) {
to_release->same_stream_ = false;
}
to_release->SetLifeTimeEnd(max_end_life_time, to_release->stream_id_);
}
void BlockMemAssigner::ReleaseMemory(MemoryBlock *const to_release, std::vector<MemoryBlock *> &reusable_memory,
int64_t stream_id, const std::string &symbol, bool no_release) {
if ((to_release == nullptr) || to_release->NodeTypeIndexList().empty()) {
GELOGE(FAILED, "[Check][Param] Input parameter to_release is null.");
return;
}
if (to_release->ref_count_ <= 0) {
GELOGI("[Check][Param] to_release->ref_count_ must greater than 0");
return;
}
if (!to_release->reuse_mem_) {
GELOGI("[Check][Param] doesn't reuse memory");
return;
}
int64_t max_life_time = life_time_;
if (!to_release->used_by_diff_streams_) {
to_release->used_by_diff_streams_ = (to_release->stream_id_ != stream_id) &&
(to_release->NodeTypeIndexList().back().mem_type_ != kWorkspace);
}
int64_t max_node_life_time_by_symbol = life_time_;
if (to_release->used_by_diff_streams_) {
const auto &node_type_index_list = to_release->NodeTypeIndexList();
const auto &node_type_index_iter =
std::find_if(node_type_index_list.rbegin(), node_type_index_list.rend(),
[](const NodeTypeIndex &node_type_index) { return !node_type_index.ref_input_; });
const auto &node_type_index = *node_type_index_iter;
if (node_type_index.node_ != nullptr) {
std::set<int64_t> streams;
max_life_time = GetNodeMaxLife(symbol_to_anchors_, out_stream_edges_, node_type_index.node_,
node_type_index.index_, max_node_life_time_by_symbol, streams);
if (node_type_index_list.back().ref_input_) {
to_release->SetOutStreamCount(streams.size());
}
GELOGI("Diff stream output node:%s max life time:%" PRId64 ", back is ref: %d, streams size: %zu",
node_type_index.node_->GetNamePtr(), max_life_time, node_type_index_list.back().ref_input_,
streams.size());
if (to_release->GetFirstContinuousFlag() && to_release->GetLastContinuousFlag()) {
GetContinuousOutputMaxLifeBySymbol(node_type_index.node_, symbol_to_anchors_, max_node_life_time_by_symbol,
out_stream_edges_);
}
GELOGI("Diff stream output node:%s max life time:%" PRId64 " by symbol", node_type_index.node_->GetNamePtr(),
max_node_life_time_by_symbol);
}
}
to_release->SetOutStreamLifeTime(max_node_life_time_by_symbol, max_life_time, stream_id);
--to_release->ref_count_;
if (to_release->ref_count_ > 0) {
return;
}
if (to_release->reuse_mem_ && (!to_release->RealSizeList().empty())
&& (to_release->batch_label_.empty() || (to_release->batch_label_ == max_batch_label_))) {
size_t align_size = to_release->RealSizeList().back();
if (!symbol.empty()) {
const auto it_size = symbol_mem_reuse_info_.find(symbol);
if (it_size != symbol_mem_reuse_info_.cend()) {
align_size = it_size->second.size_;
}
}
MemReuseUtils::AlignMemOffset(align_size);
if (memory_stat_[to_release->memory_type_].theory_memory_size_ >= align_size) {
memory_stat_[to_release->memory_type_].theory_memory_size_ -= align_size;
}
}
SetReleaseBlockLifeEnd(to_release, stream_id);
if (!IsPostReuse(to_release) || no_release) {
max_life_time = ge::kMaxLifeTime;
to_release->ClearDiffStreamLifeInfo();
to_release->SetLifeTimeEnd(ge::kMaxLifeTime, to_release->stream_id_);
}
reusable_memory.emplace_back(to_release);
to_release->ClearOutStreamLifeInfo();
to_release->used_by_diff_streams_ = false;
GELOGD("Put block:%s to pool stream:%" PRId64 " max life time:%" PRId64 "", GetName(*to_release, true).c_str(),
to_release->stream_id_, max_life_time);
}
void BlockMemAssigner::ReleaseMemorys(StreamIdToBlocks &to_releases,
StreamIdToBlocks &reusable_memory) {
for (auto &stream_blocks : to_releases) {
for (auto mem_block : stream_blocks.second) {
if (mem_block != nullptr) {
const bool output = (!mem_block->NodeTypeIndexList().empty()) &&
(mem_block->NodeTypeIndexList().back().mem_type_ == kOutput) &&
(!mem_block->SymbolList().empty());
ReleaseMemory(mem_block, reusable_memory[stream_blocks.first], mem_block->stream_id_,
output ? mem_block->SymbolList().back() : "", false);
}
}
GELOGD("Clear stream:%" PRId64 " workspace blocks", stream_blocks.first);
stream_blocks.second.clear();
}
}
void BlockMemAssigner::ReleaseInputNodeOutMemory(const NodePtr &node) {
for (const auto &in_anchor : GetSortAllInDataAnchors(node, IsMemoryPriorityMode())) {
if ((node->GetOpDescBarePtr() == nullptr) || (in_anchor->GetPeerOutAnchor() == nullptr) ||
(in_anchor->GetPeerOutAnchor()->GetOwnerNodeBarePtr()->GetOpDescBarePtr() == nullptr)) {
continue;
}
GE_IF_BOOL_EXEC(IsOutputBlock(in_anchor), continue);
std::string op_type(in_anchor->GetPeerOutAnchor()->GetOwnerNodeBarePtr()->GetTypePtr());
GE_IF_BOOL_EXEC((op_type == CONSTANT) || (op_type == FASTRCNNPREDICTIONS) || (op_type == CONSTANTOP),
continue);
const bool
is_no_release_node_out_block = IsNoReleaseNodeOutBlock(in_anchor->GetPeerOutAnchor()->GetOwnerNodeBarePtr());
const auto in_data_node = in_anchor->GetPeerOutAnchor()->GetOwnerNodeBarePtr();
GE_CHECK_NOTNULL_JUST_RETURN(in_data_node);
const int32_t in_data_node_out_index = in_anchor->GetPeerOutAnchor()->GetIdx();
NodeIndexIO node_index_io(in_data_node, in_data_node_out_index, kOut);
MemoryBlock *block = nullptr;
std::string symbol;
if (!IsSymbolExist(node_index_io, symbol, block)) {
GELOGI(
"Block of peer out not find. Peer node:%s, output index:%d, "
"current node:%s, input index:%d",
in_data_node->GetNamePtr(), in_data_node_out_index, node->GetNamePtr(), in_anchor->GetIdx());
continue;
}
GE_CHECK_NOTNULL_JUST_RETURN(block);
auto reusable_blocks_iter = reusable_blocks_.find(block->memory_type_);
if (reusable_blocks_iter == reusable_blocks_.end() || block->NodeTypeIndexList().empty()) {
continue;
}
GELOGI(
"Block of peer out is matched. Peer node:%s, output index:%d, "
"current node:%s, input index:%d, block ref_count:%d.",
in_data_node->GetNamePtr(), in_data_node_out_index, node->GetNamePtr(), in_anchor->GetIdx(), block->ref_count_);
auto stream_id = GetStreamId(node->GetOpDescBarePtr());
StreamIdToBlocks &reusable_memory = reusable_blocks_iter->second;
ReleaseMemory(block, reusable_memory[block->stream_id_], stream_id,
symbol, is_no_release_node_out_block);
if (block->ref_count_ == 0 && (stream_id == block->stream_id_)) {
SetLastUsedInputMemAttr(node, in_anchor->GetIdx(), bool_attr_);
}
}
}
void CheckAndGetOpReuseEnv(const std::string &env, std::vector<std::string> &env_vec, bool &op_reuse_env_valid) {
std::string env_str = std::string(env);
if (env_str.size() > kReuseMaxCharNum) {
GELOGE(FAILED, "[Check][Param] The OP_NO_REUSE_MEM has more than %d characters.", kReuseMaxCharNum);
return;
}
SplitStringByComma(env_str, env_vec);
if (env_vec.size() > kReuseMaxOpNum) {
GELOGE(FAILED, "[Check][Param] The OP_NO_REUSE_MEM has more than %d nodes.", kReuseMaxOpNum);
return;
}
op_reuse_env_valid = true;
return;
}
void BlockMemAssigner::CheckAndReleaseSuspendedBlock(const NodePtr &node, uint32_t idx, MemoryBlock *block) {
if ((node == nullptr) || (block == nullptr)) {
return;
}
if (block->ref_count_ == 0) {
block->ref_count_ = 1;
stream_workspace_blocks_[block->memory_type_][block->stream_id_].emplace_back(block);
GELOGI("The output is suspended, and will be released in allocation of next node. Name:%s, index:%u, "
"size:%zu, ref_count:%d, batch:%s, stream:%" PRId64 ", reuse_mem:%d.", node->GetNamePtr(), idx, block->Size(),
block->ref_count_, block->batch_label_.c_str(), block->stream_id_, block->reuse_mem_);
}
}
Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, std::vector<int64_t> &ranges) {
auto op_desc = node->GetOpDescBarePtr();
std::vector<int64_t> memorys_type;
bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, memorys_type);
GELOGD("Assign memory node[%s], output size[%zu], output memory type size[%zu]", op_desc->GetNamePtr(),
op_desc->GetOutputsSize(), memorys_type.size());
if (has_mem_type_attr && (memorys_type.size() != op_desc->GetOutputsSize())) {
REPORT_INNER_ERR_MSG("E19999", "Attr[%s] size:%zu not equal to node output size:%zu, node_name:%s",
ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(),
op_desc->GetOutputsSize(), op_desc->GetNamePtr());
GELOGE(
INTERNAL_ERROR,
"[Check][MemTypeAttr]Attr %s size:%zu not equal to node output size:%zu, node_name:%s",
ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(),
op_desc->GetOutputsSize(), op_desc->GetNamePtr());
return INTERNAL_ERROR;
}
is_op_reuse_mem_ = true;
if (op_reuse_env_valid_) {
std::vector<std::string>::iterator it_name =
std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), op_desc->GetNamePtr());
std::vector<std::string>::iterator it_type =
std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), op_desc->GetTypePtr());
GE_IF_BOOL_EXEC(it_name != op_no_reuse_mem_vec_.end() || it_type != op_no_reuse_mem_vec_.end(),
is_op_reuse_mem_ = false;);
}
bool need_gentask_atomic = false;
(void)ge::AttrUtils::GetBool(op_desc, "need_gentask_atomic", need_gentask_atomic);
bool is_atomic = false;
if (!need_gentask_atomic) {
(void)ge::AttrUtils::GetBool(op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic);
}
GE_IF_BOOL_EXEC(is_ge_reuse_mem_,
for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end();
++iter) { ReleaseMemorys(iter->second, reusable_blocks_[iter->first]);
});
life_time_ = op_desc->GetId();
bool is_zero_mem_node = CheckIsZeroMemNodeType(node->GetTypePtr());
bool is_buffer_pool_mem_supported = (op_desc->HasAttr(ATTR_NAME_BUFFER_POOL_ID)) &&
(op_desc->HasAttr(ATTR_NAME_BUFFER_POOL_SIZE)) && (!root_unknown_shape_flag_);
bool need_apply_continuous_memory = IsContinuousOutput(node);
for (uint32_t i = 0U; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) {
int64_t size = 0;
auto output_tensor_desc = op_desc->MutableOutputDesc(i);
if (output_tensor_desc == nullptr) {
GELOGW("op[%s] null output_tensor_desc, index[%u].", op_desc->GetNamePtr(), i);
continue;
}
life_begin_ = op_desc->GetId();
life_end_ = 0U;
is_separate_clean_continuous_inputs_ = false;
GE_IF_BOOL_EXEC(MemReuseUtils::GetTensorSize(*output_tensor_desc, size,
MemReuseUtils::IsNeedSplitSize(node, i)) != SUCCESS,
GELOGI("Tensor has no size"));
if (has_mem_type_attr && ((memorys_type[i] == RT_MEMORY_L1) || (memorys_type[i] == kRtMemoryUB))) {
GELOGI("fusion: node[%s], output[%s], output memory type [%" PRId64 "]",
op_desc->GetNamePtr(), op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]);
size = 0;
}
GE_IF_BOOL_EXEC((TensorUtils::IsMemorySizeCalcTypeAlwaysEmpty(*output_tensor_desc)), size = 0;);
InDataAnchor *continuous_in_anchor = nullptr;
bool out_node_set_continuous_input = false;
bool no_need_assign_memory = (is_zero_mem_node || is_buffer_pool_mem_supported || (size == 0));
if ((!no_need_assign_memory) && continuous_mem_mng_.IsFound(node.get(), i)) {
return ApplyContinuousMemWithMng(node, i, ranges);
}
NodeIndexIO node_index_io(node.get(), i, kOut);
bool is_reuse_zero_copy = true;
std::set<int64_t> streams;
if (!no_need_assign_memory) {
out_node_set_continuous_input =
IsOutNodeSetContinuousInput(node, i, continuous_in_anchor, is_reuse_zero_copy, streams);
GE_ASSERT_SUCCESS(GetNoNeedAssignMemoryFlag(node, i, no_need_assign_memory));
no_need_assign_memory = (no_need_assign_memory ||
IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input) ||
IsNoNeedAssignMemory(node, node_index_io, i));
}
if (no_need_assign_memory) {
zero_memory_list_.emplace_back(node.get(), kOutput, i, false);
GELOGI("node[%s] output %u, no need assign memory.", op_desc->GetNamePtr(), i);
continue;
}
if (need_apply_continuous_memory && (!out_node_set_continuous_input)) {
return ApplyContinuousMemory(node, ranges, is_op_reuse_mem_);
}
bool need_change = is_op_reuse_mem_ && is_atomic && out_node_set_continuous_input;
GE_IF_BOOL_EXEC(need_change, is_op_reuse_mem_ = false);
MemoryBlock *mem_block = ApplyOutMemory(node, i, ranges, is_op_reuse_mem_, out_node_set_continuous_input);
if (mem_block != nullptr) {
mem_block->SetOutStreamCount(streams.size());
mem_block->is_reuse_zero_copy_ = (is_reuse_zero_copy && mem_block->is_reuse_zero_copy_);
if (continuous_in_anchor != nullptr) {
const auto continuous_op_desc = continuous_in_anchor->GetOwnerNodeBarePtr()->GetOpDescBarePtr();
node_continuous_input_blocks_[continuous_op_desc->GetId()][continuous_in_anchor->GetIdx()]
= mem_block;
}
auto iter = anchor_to_symbol_.find(node_index_io.ToString());
if (iter != anchor_to_symbol_.end()) {
symbol_blocks_[iter->second] = mem_block;
GELOGD("Node io:%s add symbol:%s block:%s", node_index_io.ToString().c_str(), iter->second.c_str(),
GetName(*mem_block).c_str());
CheckAndReleaseSuspendedBlock(node, i, mem_block);
}
}
bool is_tensor_desc_mem = false;
(void)AttrUtils::GetBool(output_tensor_desc, ATTR_NAME_TENSOR_NO_TILING_MEM_TYPE, is_tensor_desc_mem);
if (is_tensor_desc_mem) {
MemoryBlock *out_desc_mem_block = ApplyOutDescMemory(node, i, ranges);
GE_CHECK_NOTNULL(out_desc_mem_block);
}
}
return SUCCESS;
}
Status BlockMemAssigner::AssignWorkSpaceMemoryWithReuse(const NodePtr &node, std::vector<int64_t> &ranges) {
auto node_op_desc = node->GetOpDescBarePtr();
std::vector<int64_t> temp;
int64_t tatal_size = 0;
GetNodeWorkSpaceSize(node, temp, tatal_size);
std::vector<int64_t> workspace_type_list;
const bool has_workspace_type_list_attr =
ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_WORKSPACE_TYPE_LIST, workspace_type_list);
std::vector<int64_t> tvm_workspace_types;
const bool has_tvm_workspace_mem_type_attr =
ge::AttrUtils::GetListInt(node_op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, tvm_workspace_types);
std::vector<int32_t> workspace_no_reuse_scope;
const bool has_workspace_no_reuse_scope =
ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope);
std::vector<bool> workspace_reuse_flag;
GE_IF_BOOL_EXEC(!ge::AttrUtils::GetListBool(node_op_desc, kAttrNameWorkspaceReuseFlag, workspace_reuse_flag),
GELOGD("OP %s does not have workspace_reuse_flag attr", node_op_desc->GetNamePtr()));
GELOGD("Assign memory node[%s], size [temp:%zu, tvm:%zu, list:%zu, no_reuse_scopes:%zu, reuse_flags:%zu.]",
node_op_desc->GetNamePtr(), temp.size(), tvm_workspace_types.size(), workspace_type_list.size(),
workspace_no_reuse_scope.size(), workspace_reuse_flag.size());
if (((has_tvm_workspace_mem_type_attr) && (temp.size() != tvm_workspace_types.size())) ||
((has_workspace_type_list_attr) && (temp.size() != workspace_type_list.size()))) {
REPORT_INNER_ERR_MSG("E19999", "Attr:%s, memory_type.size:%zu and %s, memory_type.size:%zu and workspaces "
"num:%zu should be same, node_name:%s, check invalid", TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(),
tvm_workspace_types.size(), ATTR_NAME_WORKSPACE_TYPE_LIST.c_str(),
workspace_type_list.size(), temp.size(), node->GetNamePtr());
GELOGE(INTERNAL_ERROR, "[Check][Param] Attr:%s, memory_type.size:%zu and %s, memory_type.size:%zu and workspaces "
"num:%zu should be same, node_name:%s, check invalid", TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(),
tvm_workspace_types.size(), ATTR_NAME_WORKSPACE_TYPE_LIST.c_str(),
workspace_type_list.size(), temp.size(), node->GetNamePtr());
return INTERNAL_ERROR;
}
bool need_gentask_atomic = false;
(void)ge::AttrUtils::GetBool(node_op_desc, "need_gentask_atomic", need_gentask_atomic);
auto atomic_workspace_info = node_op_desc->TryGetExtAttr(
EXT_ATTR_ATOMIC_WORKSPACE_INFO, std::map<std::string, std::map<int64_t, int64_t>>{});
for (size_t i = 0UL; i < temp.size(); i++) {
life_begin_ = node_op_desc->GetId();
life_end_ = 0U;
bool workspace_skip_flag = false;
if (has_tvm_workspace_mem_type_attr && ((tvm_workspace_types[i] == RT_MEMORY_L1)
|| (tvm_workspace_types[i] == kRtMemoryUB))) {
GELOGI(
"fusion:node[%s]workspace index[%zu] is not hbm type, add to zero_memory_list, workspace memory type [%" PRId64 "]",
node_op_desc->GetNamePtr(), i, tvm_workspace_types[i]);
workspace_skip_flag = true;
}
if (temp[i] == 0 || workspace_skip_flag
|| (!need_gentask_atomic && IsAtomicWorkSpace(static_cast<int64_t>(i), atomic_workspace_info))) {
zero_memory_list_.emplace_back(node.get(), kWorkspace, static_cast<uint32_t>(i), false);
continue;
}
const bool session_scope_memory = (has_workspace_no_reuse_scope) && (i < workspace_no_reuse_scope.size()) &&
(workspace_no_reuse_scope[i] == kSessionNoReuse);
const bool is_p2p_memory =
(has_workspace_type_list_attr) && (static_cast<uint64_t>(workspace_type_list[i]) == RT_MEMORY_P2P_DDR);
uint64_t memory_type = GetWorkSpaceMemoryType(workspace_no_reuse_scope.size(), i, is_p2p_memory,
session_scope_memory, workspace_reuse_flag);
GELOGI("%s's workspace mem_type:%lu, index:%zu.", node->GetNamePtr(), memory_type, i);
ApplyMemoryParam param = {GetBlockSize(static_cast<size_t>(temp[i]), ranges, reuse_strategy_.use_range_),
static_cast<size_t>(temp[i]), static_cast<size_t>(temp[i]), kWorkspace,
static_cast<uint32_t>(i), is_op_reuse_mem_, false, memory_type, false};
MemoryBlock *mem_block = ApplyMemory(node, workspace_reuse_flag, param);
GE_CHECK_NOTNULL_EXEC(mem_block, continue);
mem_block->is_reuse_zero_copy_ = (mem_block->is_reuse_zero_copy_) && (CanReuseZeroCopyBlock(node.get()));
bool is_fixed_addr_prior = false;
(void) ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_IS_FIXED_ADDR_PRIOR, is_fixed_addr_prior);
mem_block->is_fixed_addr_prior_ = (mem_block->is_fixed_addr_prior_ || is_fixed_addr_prior);
GELOGI("%s's workspace fixed addr prior:%d, index:%zu.", node->GetNamePtr(), mem_block->is_fixed_addr_prior_, i);
++(mem_block->ref_count_);
CheckWorkspaceReuse(workspace_reuse_flag, i, GetStreamId(node_op_desc), mem_block, memory_type);
}
return SUCCESS;
}
void BlockMemAssigner::ParseGraphIoAllocMode() {
constexpr const char_t *kStaticModelAddrFixed = "ge.exec.static_model_addr_fixed";
std::string is_addr_fixed_opt;
(void)ge::GetContext().GetOption(kStaticModelAddrFixed, is_addr_fixed_opt);
is_static_model_addr_fixed_ = !is_addr_fixed_opt.empty();
if ((compute_graph_ == nullptr) || (compute_graph_->GetParentGraph() != nullptr)) {
return;
}
std::string alloc_mode;
(void)ge::GetContext().GetOption(OPTION_GRAPH_IO_MEM_ALLOC_MODE, alloc_mode);
is_io_alloc_by_ge_in_run_graph_ = (alloc_mode == "ByGE");
GELOGI("io_alloc_mode:%s, graph:%s.", (is_io_alloc_by_ge_in_run_graph_ ? "ByGE" : "ByApp"),
compute_graph_->GetName().c_str());
}
Status BlockMemAssigner::InitIoReuseFlag() {
GE_ASSERT_NOTNULL(compute_graph_);
const auto root_graph = GraphUtils::FindRootGraph(compute_graph_);
GE_ASSERT_NOTNULL(root_graph);
if (root_graph->GetGraphUnknownFlag()) {
const auto netoutput_node = compute_graph_->FindFirstNodeMatchType(NETOUTPUT);
GE_ASSERT_NOTNULL(netoutput_node);
const auto &netoutput_op_desc = netoutput_node->GetOpDesc();
GE_ASSERT_NOTNULL(netoutput_op_desc);
const size_t inputs_size = netoutput_op_desc->GetAllInputsSize();
output_index_to_reuse_mem_flag_.resize(inputs_size, true);
return SUCCESS;
}
ParseIoReuseMemOption();
return SUCCESS;
}
void BlockMemAssigner::ParseIoReuseMemOption() {
if ((compute_graph_ == nullptr) || (compute_graph_->GetParentGraph() != nullptr)) {
return;
}
const auto &graph_option = GetThreadLocalContext().GetAllGraphOptions();
const auto it_input_indexes = graph_option.find(OPTION_INPUT_REUSE_MEM_INDEXES);
if (it_input_indexes != graph_option.end()) {
GELOGI("[io_reuse_mem_option] option_input_indexes:%s", it_input_indexes->second.c_str());
const size_t inputs_size = compute_graph_->GetInputNodes().size();
input_index_to_reuse_mem_flag_.resize(inputs_size, false);
std::vector<std::string> in_index_vec;
SplitStringByComma(it_input_indexes->second, in_index_vec);
for (const auto &str : in_index_vec) {
const int32_t in_index = std::stoi(str);
if ((in_index < 0) || (static_cast<size_t>(in_index) >= inputs_size)) {
GELOGW("[Check][Option]Check failed because option(%s=%s) is invalid. Input_index must be in the "
"range of [0, %zu)", OPTION_INPUT_REUSE_MEM_INDEXES, it_input_indexes->second.c_str(), inputs_size);
continue;
}
input_index_to_reuse_mem_flag_[in_index] = true;
}
}
const auto it_output_indexes = graph_option.find(OPTION_OUTPUT_REUSE_MEM_INDEXES);
if (it_output_indexes != graph_option.end()) {
GELOGI("[io_reuse_mem_option] option_output_indexes:%s", it_output_indexes->second.c_str());
const auto netoutput_node = compute_graph_->FindFirstNodeMatchType(NETOUTPUT);
GE_CHECK_NOTNULL_EXEC(netoutput_node, return);
const auto &netoutput_op_desc = netoutput_node->GetOpDesc();
GE_CHECK_NOTNULL_EXEC(netoutput_op_desc, return);
const size_t inputs_size = netoutput_op_desc->GetAllInputsSize();
output_index_to_reuse_mem_flag_.resize(inputs_size, false);
std::vector<std::string> out_index_vec;
SplitStringByComma(it_output_indexes->second, out_index_vec);
for (const auto &str : out_index_vec) {
const int32_t out_index = std::stoi(str);
if ((out_index < 0) || (static_cast<size_t>(out_index) >= inputs_size)) {
GELOGW("[Check][Option]Check failed because option(%s=%s) is invalid. Output_index must be in the "
"range of [0, %zu)", OPTION_OUTPUT_REUSE_MEM_INDEXES, it_output_indexes->second.c_str(), inputs_size);
continue;
}
output_index_to_reuse_mem_flag_[out_index] = true;
}
}
return;
}
Status BlockMemAssigner::AssignMemoryWithReuse(std::vector<int64_t> &ranges) {
std::string ge_disable_reuse_mem;
(void)ge::GetContext().GetOption(OPTION_EXEC_DISABLE_REUSED_MEMORY, ge_disable_reuse_mem);
GEEVENT("Reuse memory %s, memory_priority_mode is %s.", ge_disable_reuse_mem == "1" ? "close" : "open",
memory_priority_mode_ ? "true" : "false");
is_ge_reuse_mem_ = (ge_disable_reuse_mem != "1");
const char_t *op_no_reuse_mem = nullptr;
MM_SYS_GET_ENV(MM_ENV_OP_NO_REUSE_MEM, op_no_reuse_mem);
if (op_no_reuse_mem != nullptr) {
std::string op_no_reuse_mem_str = op_no_reuse_mem;
CheckAndGetOpReuseEnv(op_no_reuse_mem_str, op_no_reuse_mem_vec_, op_reuse_env_valid_);
}
auto root_graph = GraphUtils::FindRootGraph(compute_graph_);
GE_ASSERT_NOTNULL(root_graph, "[Check][RootGraph]Root graph is nullptr, graph:%s.",
compute_graph_->GetName().c_str());
root_unknown_shape_flag_ = root_graph->GetGraphUnknownFlag();
(void) AttrUtils::GetBool(compute_graph_, ATTR_NAME_MEM_RELEASE_FIRST_REUSE_FIRST,
reuse_strategy_.reuse_first_release_);
if (reuse_strategy_.reuse_first_release_) {
GELOGI("The block usage strategy: first release, first reuse.");
} else {
GELOGI("The block usage strategy: first release, last reuse.");
}
for (NodePtr &n : compute_graph_->GetAllNodes()) {
auto node_op_desc = n->GetOpDescBarePtr();
GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
GE_ASSERT_SUCCESS(AssignOutputMemoryWithReuse(n, ranges), "node: %s(%s) assign output memory failed.",
n->GetNamePtr(), n->GetTypePtr());
GE_ASSERT_SUCCESS(AssignWorkSpaceMemoryWithReuse(n, ranges), "node: %s(%s) assign workspace memory failed.",
n->GetNamePtr(), n->GetTypePtr());
ReleaseInputNodeOutMemory(n);
}
for (const auto &block_pair : reusable_blocks_) {
memory_stat_[block_pair.first].stream_count_ = block_pair.second.size();
}
GELOGD("Assigned memory blocks:");
PrintMemBlock();
GE_IF_BOOL_EXEC(is_ge_reuse_mem_, ReuseBlocksByLifeTime());
AssignContinuousBlocks();
GE_ASSERT_SUCCESS(ResizeMemoryBlocks(), "resize memory block failed");
GELOGD("Memory blocks after resize:");
PrintMemBlock();
return SUCCESS;
}
void BlockMemAssigner::PrintMemBlock() {
for (auto mem_block : memory_blocks_) {
if (mem_block == nullptr) {
continue;
}
mem_block->SetRefLifeTimeEnd();
GELOGD("%s", mem_block->String().c_str());
}
}
void BlockMemAssigner::CheckWorkspaceReuse(const std::vector<bool> &workspace_reuse_flag,
uint32_t index, int64_t stream_id, MemoryBlock *const mem_block, uint64_t memory_type) {
bool reuse_mem_flag =
((workspace_reuse_flag.size() > index) && (!workspace_reuse_flag[index])) ? false : true;
if (reuse_mem_flag) {
stream_workspace_blocks_[memory_type][stream_id].emplace_back(mem_block);
}
}
void BlockMemAssigner::GetNodeWorkSpaceSize(const NodePtr &node, std::vector<int64_t> &workspace_memory,
int64_t &total_size) const {
if (node->GetOpDescBarePtr() == nullptr) {
REPORT_INNER_ERR_MSG("E19999", "param node opdesc is nullptr, check invalid.");
GELOGE(FAILED, "[Check][Param] Op desc is null.");
return;
}
std::vector<int64_t> workspace_byte_nums = node->GetOpDescBarePtr()->GetWorkspaceBytes();
for (int64_t byte_size : workspace_byte_nums) {
if ((byte_size < 0) && (byte_size != -1)) {
GELOGE(FAILED, "[Check][Workspace]workspace_size:%" PRId64 " is invalid, "
"maybe it is unknown shape node, Node_name:%s",
byte_size, node->GetOpDescBarePtr()->GetNamePtr());
REPORT_INNER_ERR_MSG("E19999", "workspace_size:%" PRId64 " is invalid, "
"maybe it is unknown shape node, Node_name:%s",
byte_size, node->GetOpDescBarePtr()->GetNamePtr());
workspace_memory.emplace_back(byte_size);
return;
}
byte_size = (byte_size < 0) ? 0 : byte_size;
workspace_memory.emplace_back(byte_size);
total_size += byte_size;
GELOGI("node[%s] workspace_byte_nums:%zu, push back size:%" PRId64 "", node->GetOpDescBarePtr()->GetNamePtr(),
workspace_byte_nums.size(), byte_size);
}
}
static bool CompareBlockIndex(const MemoryBlock *const left, const MemoryBlock *const right) {
if (left == nullptr || right == nullptr) {
return false;
}
if (left->input_index_ < right->input_index_) {
return true;
}
return false;
}
void ReAssignContinuousBlocks(const std::vector<MemoryBlock *> &org_blocks,
const std::map<MemoryBlock *, uint32_t> &block_map,
std::vector<MemoryBlock *> &dest_blocks, std::vector<MemoryBlock *> &continuous_blocks,
const std::string &type) {
for (auto &memory_block : org_blocks) {
if (memory_block == nullptr || memory_block->child_block_) {
continue;
}
if (block_map.find(memory_block) != block_map.end()) {
continue;
}
dest_blocks.emplace_back(memory_block);
}
std::sort(continuous_blocks.begin(), continuous_blocks.end(), CompareBlockIndex);
size_t count = 0UL;
for (auto &memory_block : continuous_blocks) {
GE_IF_BOOL_EXEC(memory_block == nullptr, continue);
GELOGI("Block continuous %s index:%d", type.c_str(), memory_block->input_index_);
count++;
if (count == 1U) {
memory_block->SetFirstContinuousBlock();
}
if (count == continuous_blocks.size()) {
memory_block->SetLastContinuousBlock();
}
dest_blocks.emplace_back(memory_block);
}
}
void BlockMemAssigner::AssignContinuousBlocks() {
for (const auto &block_map : node_continuous_input_blocks_) {
std::vector<MemoryBlock *> dest_memory_blocks;
std::map<MemoryBlock *, uint32_t> continuous_block_map;
std::vector<MemoryBlock *> continuous_blocks;
const auto it = node_continuous_input_counts_.find(block_map.first);
GE_IF_BOOL_EXEC(it == node_continuous_input_counts_.end(), continue);
const bool size_independent = SizeIndependentOfBatch(it->second.first);
GELOGI("Node %" PRId64 " continuous input block count:%zu input count:%u, size_independent:%d", block_map.first,
block_map.second.size(), it->second.second, static_cast<int32_t>(size_independent));
GE_IF_BOOL_EXEC(it->second.second != block_map.second.size(), continue);
for (auto &iter : block_map.second) {
if (iter.second != nullptr) {
iter.second->need_same_offset_in_batch_ = size_independent;
continuous_block_map[iter.second] = iter.first;
iter.second->input_index_ = iter.first;
continuous_blocks.emplace_back(iter.second);
}
}
if (continuous_block_map.size() != continuous_blocks.size()) {
GELOGW("Node %" PRId64 " continuous input map size:%zu vector size:%zu", block_map.first,
continuous_block_map.size(), continuous_blocks.size());
continue;
}
ReAssignContinuousBlocks(memory_blocks_, continuous_block_map, dest_memory_blocks, continuous_blocks, "input");
memory_blocks_.swap(dest_memory_blocks);
}
}
struct CompareLifeInterval {
explicit CompareLifeInterval(const ReuseStrategy &reuse_strategy) : reuse_strategy_(reuse_strategy) {}
bool operator() (MemoryBlock *const left, MemoryBlock *const right) const {
if ((left != nullptr) && (right != nullptr)) {
auto left_size = left->AlignSize();
auto right_size = right->AlignSize();
if (left->GetContinuousFlag()) {
auto it = std::max_element(std::begin(left->NoAlignSizeList()), std::end(left->NoAlignSizeList()));
if (it != left->NoAlignSizeList().end()) {
left_size = *it;
}
}
if (right->GetContinuousFlag()) {
auto it = std::max_element(std::begin(right->NoAlignSizeList()), std::end(right->NoAlignSizeList()));
if (it != right->NoAlignSizeList().end()) {
right_size = *it;
}
}
if (left_size == right_size) {
if (!reuse_strategy_.ascending_sort_) {
return (left->GetLifeBegin(true) > right->GetLifeBegin(true));
}
if (left->NodeTypeIndexList().size() == right->NodeTypeIndexList().size()) {
return (left->GetLifeBegin(true) < right->GetLifeBegin(true));
} else {
return (left->NodeTypeIndexList().size() < right->NodeTypeIndexList().size());
}
} else {
return (left_size > right_size);
}
}
return false;
}
ReuseStrategy reuse_strategy_;
};
void BlockMemAssigner::ReuseBlocksByLifeTime() {
if (!NeedLevel2Reuse()) {
return;
}
CompareLifeInterval cmp(reuse_strategy_);
std::sort(memory_blocks_.begin(), memory_blocks_.end(), cmp);
for (size_t i = 0UL; i < memory_blocks_.size(); ++i) {
auto parent = memory_blocks_[i];
GE_IF_BOOL_EXEC((parent == nullptr || parent->child_block_), continue);
for (size_t j = i + 1; j < memory_blocks_.size(); ++j) {
auto child = memory_blocks_[j];
GE_IF_BOOL_EXEC((child == nullptr), continue);
if (!child->NodeTypeIndexList().empty() && parent->GetContinuousFlag()) {
auto node = child->NodeTypeIndexList()[0].node_;
bool before_atomic_clean = ((node == nullptr) || (node->GetOpDescBarePtr() == nullptr)
|| (node->GetOpDescBarePtr()->GetId() < GetAtomicAddrCleanId()));
GE_IF_BOOL_EXEC(before_atomic_clean, continue);
}
std::vector<MemoryBlock *> clone_blocks;
parent->AddLifeReuseBlock(this, child, clone_blocks, 0, in_stream_edges_);
GE_IF_BOOL_EXEC(clone_blocks.empty(), continue);
memory_blocks_.insert(memory_blocks_.cbegin() + j + 1, clone_blocks.cbegin(), clone_blocks.cend());
blocks_store_.insert(blocks_store_.cend(), clone_blocks.cbegin(), clone_blocks.cend());
size_t min_block_align_size = parent->AlignSize();
for (const auto block : clone_blocks) {
min_block_align_size =
((block != nullptr) && (block->AlignSize() < min_block_align_size)) ? block->AlignSize() :
min_block_align_size;
}
const size_t next_index = j + 1 + clone_blocks.size();
const bool need_sort_again = ((next_index < memory_blocks_.size()) && (memory_blocks_[next_index] != nullptr)
&& (min_block_align_size < memory_blocks_[next_index]->AlignSize())) || memory_priority_mode_;
if (need_sort_again) {
std::sort(memory_blocks_.begin() + j + 1, memory_blocks_.end(), cmp);
}
}
}
}
Status AddBlockMemOffset(std::map<uint64_t, size_t> &mem_offsets, MemoryBlock &block) {
auto it = mem_offsets.find(block.memory_type_);
if (it == mem_offsets.end()) {
auto result = mem_offsets.insert(std::pair<int64_t, size_t>(block.memory_type_, block.memory_type_logic_base_));
GE_ASSERT_TRUE(result.second);
it = result.first;
}
GE_ASSERT_TRUE(it != mem_offsets.end());
auto &mem_offset = it->second;
block.Resize();
GE_ASSERT_SUCCESS(block.SetHeadOffset(mem_offset));
mem_offset += block.Size();
block.SetTailOffset(mem_offset - 1);
return SUCCESS;
}
Status BlockMemAssigner::ResizeMemoryBlocks() {
DynamicBatchMemAssigner dynamic_batch_mem_assigner(reuse_strategy_, memory_blocks_, blocks_store_);
dynamic_batch_mem_assigner.ResizeDynamicBatchBlocks();
for (auto &memory_block : memory_blocks_) {
if (memory_block == nullptr || memory_block->child_block_ || memory_block->is_zero_copy_) {
continue;
}
GE_ASSERT_SUCCESS(AddBlockMemOffset(mem_offsets_, *memory_block));
}
for (auto &it : memory_stat_) {
it.second.theory_min_memory_size_ += it.second.theory_no_reuse_memory_size_;
}
for (const auto &it : mem_offsets_) {
GELOGI("Reuse result:%s memory type:%lu mem_offset exclude zero_copy_memory:%zu.",
(compute_graph_ != nullptr) ? compute_graph_->GetName().c_str() : "", it.first, it.second);
}
return SUCCESS;
}
void BlockMemAssigner::SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock &block,
size_t real_size, size_t no_align_size, int32_t child_block_level) const {
GE_CHECK_NOTNULL_EXEC(node_type.node_, return);
auto op_desc = node_type.node_->GetOpDescBarePtr();
GE_CHECK_NOTNULL_EXEC(op_desc, return);
std::string graph_name = compute_graph_->GetName();
std::vector<int64_t> memorys_type;
int64_t offset = block.HeadOffset();
bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, memorys_type);
if (node_type.mem_type_ == kOutput) {
std::vector<int64_t> output_list = op_desc->GetOutputOffset();
for (auto i = static_cast<uint32_t>(output_list.size()); i < node_type.index_ + 1; i++) {
output_list.emplace_back(kInvalidOffset);
}
if (output_list.empty()) {
GELOGW("Empty output");
return;
}
static const std::set<std::string> kSetOffsetTypes = { DATA_TYPE, REFDATA, AIPP_DATA_TYPE, MULTISHAPE, NETOUTPUT };
if ((kSetOffsetTypes.count(op_desc->GetTypePtr()) > 0) && !IsKnownSubgraphData(node_type.node_)) {
if ((output_list[node_type.index_] == kInvalidOffset) || (output_list[node_type.index_] < offset)) {
output_list.at(node_type.index_) = offset;
}
} else {
bool set_out_offset = (!has_mem_type_attr) ||
(memorys_type.size() > node_type.index_ && memorys_type[node_type.index_] != RT_MEMORY_L1 &&
(memorys_type[node_type.index_] != kRtMemoryUB));
if (set_out_offset) {
output_list.at(node_type.index_) = offset;
}
}
op_desc->SetOutputOffset(output_list);
} else if ((node_type.mem_type_ == kWorkspace) && (!node_type.is_subgraph_workspace_)) {
std::vector<int64_t> workspace_list;
workspace_list = op_desc->GetWorkspace();
for (auto i = static_cast<uint32_t>(workspace_list.size()); i < node_type.index_ + 1; i++) {
workspace_list.emplace_back(kInvalidOffset);
}
std::vector<int64_t> workspace_mem_type;
bool has_workspace_mem_type = ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, workspace_mem_type);
bool set_workspace_offset = (!has_workspace_mem_type) ||
(workspace_mem_type.size() > node_type.index_ && workspace_mem_type[node_type.index_] != RT_MEMORY_L1 &&
(workspace_mem_type[node_type.index_] != kRtMemoryUB));
if (set_workspace_offset) {
workspace_list.at(node_type.index_) = offset;
}
op_desc->SetWorkspace(workspace_list);
} else if (node_type.mem_type_ == kOutputDesc) {
auto tensor = op_desc->MutableOutputDesc(node_type.index_);
GE_IF_BOOL_EXEC(tensor != nullptr,
(void)AttrUtils::SetInt(tensor, ATTR_NAME_TENSOR_DESC_MEM_OFFSET, offset));
}
GELOGI(
"[IMAS]Set %s name[%s] optype[%s] %s[%u] offset to [%" PRId64 "] streamid[%s] memtype[%lu] size[%zu] realsize[%zu] "
"noalignsize[%zu] life time begin[%s] life time end[%s] child[%d:%d:%d:%d:%d] isref[%d] batch[%s], "
"block_type[%s]",
MemReuseUtils::GetGraphNameId(compute_graph_.get()).c_str(), op_desc->GetName().substr(0, kMaxLogLen).c_str(),
node_type.node_->GetTypePtr(), node_type.GetMemType().c_str(), node_type.index_, offset,
GetStreamIdDesc(op_desc).c_str(), block.memory_type_, block.Size(), real_size, no_align_size,
node_type.GetLifeBeginDesc().c_str(), node_type.GetLifeEndDesc().c_str(), child_block_level, block.reuse_mem_,
block.GetContinuousFlag(), block.is_zero_copy_, block.same_stream_, node_type.ref_input_,
block.batch_label_.c_str(),
NodeMemAttrUtils::GetAttrStr(node_type).c_str());
if ((!node_type.ref_input_) && (real_size != 0U)) {
size_t life_end = node_type.GetLifeEnd().back();
life_end = (life_end == kDefaultLifeTime) ? kMaxLifeTime : life_end;
CANN_PROFILING_REPORT_STATIC_OP_MEM_INFO(compute_graph_, node_type.node_->GetOpDesc(), real_size,
node_type.GetLifeBegin(), life_end);
}
}
void BlockMemAssigner::SetBlockOpMemOffset(const MemoryBlock *const block,
int32_t child_block_level, bool &is_fixed_addr_prior) const {
if (block == nullptr) {
return;
}
size_t index = 0UL;
size_t real_size = 0UL;
size_t no_align_size = 0UL;
auto real_size_list_size = block->RealSizeList().size();
for (const NodeTypeIndex &node_type_index : block->NodeTypeIndexList()) {
if (index < real_size_list_size) {
real_size = block->RealSizeList()[index];
no_align_size = block->NoAlignSizeList()[index];
}
SetOffsetSize(node_type_index, *block, real_size, no_align_size, child_block_level);
index++;
}
is_fixed_addr_prior = (is_fixed_addr_prior || block->is_fixed_addr_prior_);
child_block_level++;
if (!block->ChildSubGraphBlockList().empty()) {
for (MemoryBlock *child_block : block->ChildSubGraphBlockList()) {
SetBlockOpMemOffset(child_block, child_block_level, is_fixed_addr_prior);
}
}
for (auto &blocks : block->BatchBlockList()) {
for (auto child_block : blocks.second) {
SetBlockOpMemOffset(child_block, child_block_level, is_fixed_addr_prior);
}
}
for (MemoryBlock *child_block : block->ChildBlockList()) {
SetBlockOpMemOffset(child_block, child_block_level, is_fixed_addr_prior);
}
}
void BlockMemAssigner::SetOpMemOffset(bool is_zero_copy) const {
if (!is_zero_copy) {
for (const auto &attr : bool_attr_) {
if (!ge::AttrUtils::SetBool(attr.ptr_, attr.name_, attr.value_)) {
GELOGW("Set %s input[%d] %s to %s failed.",
attr.desc_->GetNamePtr(), attr.index_, attr.name_.c_str(), attr.value_ ? "true" : "false");
continue;
}
GELOGD("Set %s input[%d] %s to %s success.",
attr.desc_->GetNamePtr(), attr.index_, attr.name_.c_str(), attr.value_ ? "true" : "false");
}
for (const auto &attr : int_attr_) {
if (!ge::AttrUtils::SetInt(attr.ptr_, attr.name_, attr.value_)) {
GELOGW("Set %s attr %s to %" PRId64 " failed.", attr.desc_->GetNamePtr(), attr.name_.c_str(), attr.value_);
continue;
}
GELOGD("Set %s attr %s to %" PRId64 " success.", attr.desc_->GetNamePtr(), attr.name_.c_str(), attr.value_);
}
}
for (MemoryBlock *memory_block : memory_blocks_) {
if (memory_block == nullptr || memory_block->child_block_) {
continue;
}
if ((is_zero_copy && !memory_block->is_zero_copy_) || (!is_zero_copy && memory_block->is_zero_copy_)) {
continue;
}
bool is_fixed_addr_prior = false;
SetBlockOpMemOffset(memory_block, 0, is_fixed_addr_prior);
memory_block->is_fixed_addr_prior_ = (memory_block->is_fixed_addr_prior_ || is_fixed_addr_prior);
}
const auto var_mng = VarManager::Instance(compute_graph_->GetSessionID());
if (!is_zero_copy) {
for (const NodeTypeIndex &node_type_index : zero_memory_list_) {
if (var_mng->IsVarExist(VarMemAssignUtil::GetNameForVarManager(node_type_index.node_->GetOpDesc()))) {
continue;
}
MemoryBlock block(reuse_strategy_, 0, 0);
SetOffsetSize(node_type_index, block, 0UL, 0UL, 0);
}
}
SetOffsetForContinuousMem();
}
void BlockMemAssigner::SetOffsetForContinuousMem() const {
for (const auto &continuous_mem : continuous_mem_mng_.GetAllContinuousMem()) {
const auto &blocks = continuous_mem.GetBlocks();
if (!blocks.empty()) {
const auto block = blocks.front();
auto offset = block->HeadOffset();
for (size_t i = 0U; i < continuous_mem.GetContinuousNodeOut().size(); ++i) {
const auto &node_index = continuous_mem.GetContinuousNodeOut().at(i);
auto op_desc = node_index.node_ptr_->GetOpDescBarePtr();
auto out_offsets = op_desc->GetOutputOffset();
while (out_offsets.size() < node_index.index_ + 1U) {
out_offsets.emplace_back(kInvalidOffset);
}
out_offsets[node_index.index_] = offset;
op_desc->SetOutputOffset(out_offsets);
const auto align_size = continuous_mem.GetAlignedSizes().at(i);
GELOGI("[ContinuousMem][IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%" PRId64 "] "
"stream_id[%" PRId64 "] memtype[%" PRId64 "] size[%zu] realsize[%" PRId64 "] nopadding[%d], block_type[%s]",
MemReuseUtils::GetGraphNameId(compute_graph_.get()).c_str(),
op_desc->GetName().substr(0, kMaxLogLen).c_str(),
op_desc->GetType().c_str(), node_index.index_, offset,
op_desc->GetStreamId(), block->memory_type_, 0UL, align_size, false,
NodeMemAttrUtils::GetAttrStr({node_index.node_ptr_, kOutput, node_index.index_}).c_str());
offset += align_size;
}
}
}
}
void BlockMemAssigner::SetOpMemOffset(const std::vector<MemoryBlock *> &zero_copy_blocks) const {
for (const auto memory_block : zero_copy_blocks) {
if ((memory_block != nullptr) && (!memory_block->child_block_)) {
bool is_fixed_addr_prior = false;
SetBlockOpMemOffset(memory_block, 0, is_fixed_addr_prior);
memory_block->is_fixed_addr_prior_ = (memory_block->is_fixed_addr_prior_ || is_fixed_addr_prior);
}
}
}
Status BlockMemAssigner::Assign() {
return SUCCESS;
}
bool BlockMemAssigner::CheckIsZeroMemNodeType(const std::string &node_type) const {
return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || (node_type == CONSTANTOP)
|| (node_type == HVDWAIT) || (node_type == FILECONSTANT) || (node_type == CONSTPLACEHOLDER);
}
bool BlockMemAssigner::CheckIsZeroMemNodeOutputIndex(const NodePtr &n, uint32_t index) const {
const auto op_desc = n->GetOpDescBarePtr();
GE_ASSERT_NOTNULL(op_desc);
const auto output_tensor_desc = op_desc->MutableOutputDesc(index);
if (output_tensor_desc == nullptr) {
GELOGW("op[%s] null output_tensor_desc, index[%u]", op_desc->GetName().c_str(), index);
return false;
}
int32_t tensor_type = 0;
const bool ret = ge::AttrUtils::GetInt(output_tensor_desc, ATTR_NAME_TENSOR_MEMORY_SCOPE, tensor_type);
if (ret && tensor_type == kOutputMemoryGlobalType) {
GELOGD("node[%s] output[%u] is zero memory", n->GetName().c_str(), index);
return true;
}
return false;
}
uint64_t BlockMemAssigner::GetWorkSpaceMemoryType(const size_t no_reuse_scope_size, const size_t index,
const bool is_p2p_memory, const bool session_scope_memory,
std::vector<bool> &workspace_reuse_flag) const {
if (is_p2p_memory) {
return RT_MEMORY_P2P_DDR;
}
if (session_scope_memory) {
if (workspace_reuse_flag.empty()) {
workspace_reuse_flag.assign(no_reuse_scope_size, true);
}
workspace_reuse_flag[index] = false;
return kSessionScopeMemory | RT_MEMORY_HBM;
}
return RT_MEMORY_HBM;
}
}