* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
#ifndef GE_GRAPH_PASSES_BUFFER_POOL_MEMORY_PASS_H_
#define GE_GRAPH_PASSES_BUFFER_POOL_MEMORY_PASS_H_
#include <queue>
#include "graph/graph.h"
#include "graph/passes/graph_pass.h"
namespace ge {
class BufferPoolMemoryPass : public GraphPass {
public:
explicit BufferPoolMemoryPass() : logic_event_num_(0) {}
~BufferPoolMemoryPass() override = default;
struct BufferPool {
int64_t pool_id = 0;
int64_t pool_size = 0;
std::unordered_map<NodePtr, NodePtr> buffer_node_to_calc;
BufferPool(int64_t id, int64_t size, const std::unordered_map<NodePtr, NodePtr> &node_map)
: pool_id(id), pool_size(size), buffer_node_to_calc(node_map) {}
};
struct BufferPoolNodeItem {
NodePtr node = nullptr;
NodePtr out_calc_node = nullptr;
NodePtr pre_buffer_pool_node = nullptr;
int64_t total_size = 0;
int64_t offset_start = 0;
int64_t offset_end = 0;
bool is_last_input = true;
bool is_first_in_group = true;
BufferPoolNodeItem(const NodePtr &buffer_n, const NodePtr &calc_n, const NodePtr &pre_buffer_n,
int64_t size, int64_t start, int64_t end, bool last)
: node(std::move(buffer_n)),
out_calc_node(std::move(calc_n)),
pre_buffer_pool_node(std::move(pre_buffer_n)),
total_size(size),
offset_start(start),
offset_end(end),
is_last_input(last) {}
BufferPoolNodeItem(const NodePtr &buffer_n, int64_t start, int64_t end)
: node(std::move(buffer_n)),
out_calc_node(nullptr),
pre_buffer_pool_node(nullptr),
total_size(0),
offset_start(start),
offset_end(end),
is_last_input(true) {}
};
struct BufferPoolNodeItemGroup {
std::vector<BufferPoolNodeItem> node_items;
};
Status Run(ComputeGraphPtr graph) override;
private:
static void ClearQueue(std::queue<std::pair<std::string, uint32_t>> &q);
static bool IsBufferPoolMemEnable(const ComputeGraphPtr &graph);
static Status CheckBufferPoolSize(int64_t total_size, int64_t pool_id, int64_t buffer_pool_size,
std::unordered_map<int64_t, int64_t> &calc_total_size);
static Status TryToFixNodeOrder(NodePtr &pre_node, NodePtr &curr_node, bool ¬_change);
Status InsertMemCpyNodeAfter(NodePtr &node) const;
Status CopyOutForMultiUsedOutput(ComputeGraphPtr &graph) const;
Status GetBufferPoolAndPeerCalcNodes(const ComputeGraphPtr &graph);
Status SetBufferPoolSize(const std::string &batch_label, int64_t id, int64_t size);
Status AllocateAllBufferPoolSpace();
Status AllocateSpaceInBatch(const std::map<int64_t, std::vector<NodePtr>> &calc_nodes,
const std::unordered_map<int64_t, int64_t> &buffer_pool_size_map,
const std::unordered_map<NodePtr, NodePtr> &buffer_node_to_calc);
Status AllocateSpaceInBufferPool(const BufferPool &buffer_pool,
const std::vector<NodePtr> &calc_nodes_in_pool);
Status AllocateSpaceForBufferPoolNode(int64_t &next_start,
const BufferPool &buffer_pool,
BufferPoolNodeItem &buffer_pool_node_item,
const int64_t group_total_mem_size,
std::queue<BufferPoolNodeItem> &node_mem_range_in_pool);
NodePtr GetOffsetAndDependency(int64_t &next_start,
int64_t total_mem_size,
int64_t buffer_pool_size,
const std::unordered_map<NodePtr, NodePtr> &buffer_node_to_calc,
std::queue<BufferPoolNodeItem> &nodes_in_buffer) const;
Status FixTheTimingOfDependentNodes(NodePtr &dependent_calc_node, NodePtr &curr_pool_node);
uint32_t GenerateEventId(const std::string &node_name, std::queue<std::pair<std::string, uint32_t>> &event_queue);
Status SetResultOfMemoryAndEvent();
Status DisableBufferPreventingCycle(const ComputeGraphPtr &compute_graph);
static Status DoCheckAndDisable(
const std::vector<NodePtr> &calc_nodes,
std::unordered_map<NodePtr, std::vector<BufferPoolNodeItem>> &calc_node_to_buffer_node_items);
static bool HasDependency(const NodePtr &from_node, const NodePtr &to_node);
static bool FindOrBackward(const NodePtr &target_node,
std::vector<NodePtr> &nodes,
std::unordered_set<const Node *> &visited);
void GroupBufferPoolNodes();
void DoGroupBufferPoolNodes(std::vector<NodePtr> &calc_nodes,
std::unordered_map<NodePtr, std::vector<BufferPoolNodeItem>> &buffer_node_items);
static NodePtr GetLastOutDataNode(const NodePtr &node);
static NodePtr BypassRefIoNodes(const NodePtr &node);
std::map<std::string, std::map<int64_t, std::vector<NodePtr>>> calc_nodes_;
std::unordered_map<std::string, std::unordered_map<NodePtr, NodePtr>> buffer_node_to_calc_;
std::unordered_map<std::string, std::unordered_map<NodePtr, std::vector<BufferPoolNodeItem>>> peer_buffer_node_item_;
std::unordered_map<NodePtr, std::vector<BufferPoolNodeItemGroup>> peer_buffer_node_items_;
std::unordered_map<std::string, std::unordered_map<int64_t, int64_t>> buffer_pool_size_;
uint32_t logic_event_num_;
std::queue<std::pair<std::string, uint32_t>> mem_ctrl_event_;
std::queue<std::pair<std::string, uint32_t>> stream_ctrl_event_;
std::unordered_map<NodePtr, std::vector<std::string>> node_event_multiplexing_;
std::unordered_map<NodePtr, std::vector<int64_t>> buffer_node_logical_offset_;
std::unordered_set<NodePtr> buffer_pool_nodes_;
};
}
#endif