* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
#ifndef PARSER_TUNING_SPACE_H_
#define PARSER_TUNING_SPACE_H_
#include <vector>
#include <string>
#include <memory>
#include <iostream>
#include <sstream>
#include "graph/node.h"
#include "base/model_info.h"
#include "ascendc_ir/ascendc_ir_core/ascendc_ir_def.h"
#include "ascendc_ir.h"
#include "ascir_node_param/ascir_node_param.h"
namespace att {
class TilingScheduleConfigTable;
const std::unordered_map<AxisPosition, std::string> AxisType2Str = {
{AxisPosition::OUTER, "OUTER"},
{AxisPosition::INNER, "INNER"},
{AxisPosition::ORIGIN, "ORIGIN"},
{AxisPosition::MERGED, "MERGED"},
{AxisPosition::POSERR, "INVALID"},
};
const std::unordered_map<HardwareDef, std::string> HardwareType2Str = {
{HardwareDef::GM, "GM"},
{HardwareDef::L1, "L1"},
{HardwareDef::L2, "L2"},
{HardwareDef::L0A, "L0A"},
{HardwareDef::L0B, "L0B"},
{HardwareDef::L0C, "L0C"},
{HardwareDef::UB, "UB"},
{HardwareDef::BTBUF, "BTBUF"},
{HardwareDef::CORENUM, "CORENUM"},
{HardwareDef::HARDWAREERR, "INVALID"},
};
const std::unordered_map<PipeType, std::string> PipeType2Str = {
{PipeType::AIC_MTE1, "AIC_MTE1"},
{PipeType::AIC_MTE2, "AIC_MTE2"},
{PipeType::AIC_FIXPIPE, "AIC_FIXPIPE"},
{PipeType::AIC_MAC, "AIC_MAC"},
{PipeType::AIV_MTE2, "AIV_MTE2"},
{PipeType::AIV_MTE3, "AIV_MTE3"},
{PipeType::AIV_VEC, "AIV_VEC"},
{PipeType::AICORE_MTE1, "AICORE_MTE1"},
{PipeType::AICORE_MTE2, "AICORE_MTE2"},
{PipeType::AICORE_MTE3, "AICORE_MTE3"},
{PipeType::AICORE_CUBE, "AICORE_CUBE"},
{PipeType::AICORE_VEC, "AICORE_VEC"},
{PipeType::PIPE_NONE, "INVALID"},
};
struct SubAxis;
using SubAxisPtr = std::unique_ptr<SubAxis>;
struct SubAxis {
std::string ToString() const
{
std::stringstream ss;
ss << "name: " << name
<< ", is_bind_multi_core: " << is_bind_multi_core
<< ", is_split: " << is_split
<< ", is_last: " << is_last
<< ", enable_pad: " << enable_pad
<< ", is_node_innerest_dim: " << is_node_innerest_dim
<< ", align: " << align
<< ", axis_type: " << AxisType2Str.at(axis_type);
ss << ", repeat: " << ((repeat.IsValid()) ? Str(repeat) : "");
ss << ", orig_axis_name: ";
for (const auto &n : orig_axis_name) {
ss << n << ",";
}
ss << " parent_axis_name: ";
for (auto &axis : parent_axis) {
ss << axis->name << ",";
}
return ss.str();
}
std::string name;
int64_t id{-1L};
AxisPosition axis_type{};
bool is_bind_multi_core = false;
bool enable_tail = false;
bool is_split = false;
bool enable_pad = false;
bool is_last = false;
bool is_node_innerest_dim = false;
bool is_concat_vec_axis = false;
uint32_t data_type_size = 4;
Expr align = af::Symbol(1);
Expr repeat;
std::pair<int64_t, int64_t> value_range = {-1, -1};
std::vector<std::string> orig_axis_name;
std::vector<SubAxis *> orig_axis;
std::vector<SubAxis *> parent_axis;
std::string basic;
bool is_reduce_split_axis{false};
bool is_broadcast_split_axis{false};
};
struct Tensor {
std::string ToString()
{
std::stringstream ss;
ss << "name: " << name
<< ", datasize: " << data_type_size
<< ", resource_id: " << resource_id;
ss << ", axis {";
for (auto &axis : dim_info) {
ss << axis->name << ", ";
}
ss << "} ";
return ss.str() + GetRepeat() + GetStride();
}
std::string GetStride() const
{
std::stringstream ss;
ss << ", stride: {";
for (auto &tensor_size : stride) {
std::string size = (tensor_size.IsValid()) ? Str(tensor_size) : "";
ss << size << ", ";
}
ss << "}";
ss << ", ori_stride: {";
for (auto &tensor_size : ori_stride) {
std::string size = (tensor_size.IsValid()) ? Str(tensor_size) : "";
ss << size << ", ";
}
ss << "}";
ss << ", gm_stride: {";
for (auto &tensor_size : gm_stride) {
std::string size = (tensor_size.IsValid()) ? Str(tensor_size) : "";
ss << size << ", ";
}
ss << "}";
return ss.str();
}
std::string GetRepeat() const
{
std::stringstream ss;
ss << ", repeat: {";
for (auto &tensor_size : repeat) {
std::string size = (tensor_size.IsValid()) ? Str(tensor_size) : "";
ss << size << ", ";
}
ss << "}";
ss << ", ori_repeat: {";
for (auto &tensor_size : ori_repeat) {
std::string size = (tensor_size.IsValid()) ? Str(tensor_size) : "";
ss << size << ", ";
}
ss << "}";
return ss.str();
}
std::string name;
uint32_t data_type_size;
int32_t resource_id = -1;
af::AscNode *owner_node{nullptr};
std::string node_type;
std::string data_type;
std::vector<SubAxis *> dim_info;
std::vector<Expr> repeat;
std::vector<Expr> stride;
std::vector<Expr> gm_stride;
std::vector<SubAxis *> ori_dim_info;
std::vector<Expr> ori_repeat;
std::vector<Expr> ori_stride;
std::vector<int32_t> orig_idx;
HardwareDef loc = HardwareDef::GM;
};
using TensorPtr = std::shared_ptr<Tensor>;
struct NodeInfo {
std::string name;
std::string node_type;
std::string node_unit;
std::string trans_config;
std::vector<TensorPtr> inputs;
std::vector<TensorPtr> outputs;
std::vector<SubAxis *> loop_axes;
uint32_t depth = 1U;
af::AscNodePtr node_ptr;
std::set<std::string> from_data;
std::vector<NodeInfo> sub_nodes_infos;
ascir_param::ReduceNodeParams reduce_specific_params;
af::ExecuteCondition exec_condition{af::ExecuteCondition::kNoCache};
std::string DebugString() const {
std::stringstream ss;
ss << "NodeInfo {" << name << ", " << node_type << ", " << node_unit;
ss << ", input size=" << inputs.size() << ", output size=" << outputs.size();
ss << ", loop_axes size=" << loop_axes.size() << ", from_data=";
for (const auto &data : from_data) {
ss << data << ", ";
}
ss << "sub_nodes_infos size=" << sub_nodes_infos.size();
ss << ", exec_condition = " << static_cast<int32_t>(exec_condition);
ss << " }";
return ss.str();
}
};
struct Container {
explicit Container(const std::string &name) : name(name) {}
std::vector<std::vector<TensorPtr>> GetCoTensors()
{
return coexist_tensors;
}
virtual int64_t GetBufferNum() const = 0;
std::string name;
int32_t reuse_id{0};
Expr align;
std::vector<TensorPtr> allocated_tensors;
std::vector<HardwareDef> buf_location;
std::vector<std::vector<TensorPtr>> coexist_tensors;
af::AllocType alloc_type{af::AllocType::kAllocTypeInvalid};
int64_t container_id{0};
};
using ContainerPtr = std::shared_ptr<Container>;
struct Queue : public Container {
explicit Queue(const std::string &name) : Container(name) {}
int64_t GetBufferNum() const override
{
return buffer_num;
}
int64_t buffer_num = 1L;
};
struct Buf : public Container {
explicit Buf(const std::string &name) : Container(name) {}
int64_t GetBufferNum() const override
{
return buffer_num;
}
int64_t buffer_num = 1L;
};
struct GlobalCache : public Container {
explicit GlobalCache(const std::string &name) : Container(name) {}
int64_t GetBufferNum() const override
{
return buffer_num;
}
int64_t buffer_num = 1L;
};
struct TuningSpace {
std::vector<ContainerPtr> containers;
std::vector<ContainerPtr> global_containers;
std::vector<SubAxisPtr> sub_axes;
std::vector<NodeInfo> node_infos;
std::map<int64_t, Expr> workspace_size_map;
std::vector<std::vector<SubAxis *>> block_dims;
std::map<const SubAxis *, std::set<HardwareDef>> related_scopes;
std::map<int64_t, Expr> tmp_buffer;
std::map<std::string, uint32_t> reserve_ub;
Expr builtin_tmp_buffer;
vector<CacheLineConfig> *cache_line_config{nullptr};
const TilingScheduleConfigTable *tiling_schedule_config_table{nullptr};
const af::AscGraph *asc_graph{nullptr};
};
using TuningSpacePtr = std::shared_ptr<TuningSpace>;
}
#endif