* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
* \file subgraph_to_function.cpp
* \brief
*/
#include "passes/tile_graph_pass/subgraph_to_function.h"
#include <fstream>
#include "interface/function/function.h"
#include "interface/tensor/irbuilder.h"
#include "passes/pass_utils/pass_operation_utils.h"
#include "interface/tensor/logical_tensor.h"
#include "tilefwk/tilefwk.h"
#include "interface/inner/tilefwk.h"
#include "interface/program/program.h"
#include "passes/pass_utils/parallel_tool.h"
#include "passes/pass_check/subgraph_to_function_checker.h"
#include "passes/pass_utils/graph_utils.h"
#include "passes/pass_utils/subgraph_utils.h"
#include "passes/pass_utils/pass_utils.h"
#include "passes/pass_log/pass_log.h"
#include "tilefwk/error_code.h"
#undef MODULE_NAME
#define MODULE_NAME "SubgraphToFunction"
namespace npu::tile_fwk {
namespace {
const SymbolicScalar& GetParamAddrSymbol()
{
static const SymbolicScalar kGetParamAddr = []() {
IRBuilder builder;
return builder.CreateScalarVar(AddRuntimeCoaPrefix("GET_PARAM_ADDR"));
}();
return kGetParamAddr;
}
}
void SubgraphToFunction::Init()
{
subFuncInvokeInfos.clear();
viewToCopyInMapping_.clear();
}
Status SubgraphToFunction::RunOnFunction(Function& function)
{
Init();
if (TransViewToCopyInBeforeGenSubgraph(function) != SUCCESS) {
APASS_LOG_ERROR_F(Elements::Function, "Failed to transfer view into copy in.");
return FAILED;
}
if (GetTensorDataDependencyInsert(function) != SUCCESS) {
APASS_LOG_ERROR_F(Elements::Function, "Failed to build the data dependency structure.");
return FAILED;
}
if (function.GetFunctionType() == FunctionType::STATIC) {
if (staticProcessor_.BuildGraph(function) != SUCCESS) {
APASS_LOG_ERROR_F(Elements::Function, "Failed to build graph from input function.");
return FAILED;
}
RecordIncastOutcast(function);
SetupStaticProcessor();
} else {
RecordIncastOutcast(function);
}
ConstructParamMap(function);
Function::EnableMagicLookupRecord(true, &function);
IslandToFunction(function);
Function::EnableMagicLookupRecord(false, &function);
if (GetTensorDataDependencyClear(function) != SUCCESS) {
APASS_LOG_ERROR_F(Elements::Function, "Failed to clear data dependency.");
return FAILED;
}
if (RecoverCopyInToViewAfterGenSubgraph(function) != SUCCESS) {
APASS_LOG_ERROR_F(Elements::Function, "Failed to recover copy in into view.");
return FAILED;
}
return SUCCESS;
}
std::string SubgraphToFunction::FindSymbolName(std::shared_ptr<LogicalTensor> op, int magic) const
{
if (magic < 0) {
return std::to_string(magic);
}
if (magic == 0) {
return "NULL_OPERAND";
}
MemoryType originalType = op->GetMemoryTypeOriginal();
if (originalType == MemoryType::MEM_DEVICE_DDR) {
return "Var$" + std::to_string(magic);
}
auto name = "$" + std::to_string(magic);
return name;
}
void SubgraphToFunction::RecordConnectionWithProducers(RecordInfo recordInfo, SubfuncInvokeInfoTy& iter)
{
std::vector<int> assembleRawMagic;
size_t i = recordInfo.i;
size_t j = recordInfo.j;
size_t k = recordInfo.k;
LogicalTensorPtr iOperand = recordInfo.operand;
Offset offset = recordInfo.offset;
Shape shape = recordInfo.shape;
for (auto& producer : iOperand->GetProducers()) {
auto eSgId = producer->GetSubgraphID();
std::vector<int>::iterator it = find(assembleRawMagic.begin(), assembleRawMagic.end(), iOperand->GetRawMagic());
if (it != assembleRawMagic.end()) {
continue;
}
assembleRawMagic.push_back(iOperand->GetRawMagic());
iter.RecordConnection(
eSgId, i, k, iOperand->GetRawMagic() , offset, shape, iOperand->tensor->rawshape,
iOperand->Datatype(), iOperand, nLIST[i][j]->opmagic);
}
}
bool isFromCast(LogicalTensorPtr& operand)
{
if (!(operand->GetConsumers().empty()) && !(operand->GetProducers().empty())) {
std::set<int> boundTensorIDs;
for (auto& outOp : operand->GetConsumers()) {
boundTensorIDs.insert(outOp->GetSubgraphID());
}
for (auto& inOp : operand->GetProducers()) {
boundTensorIDs.insert(inOp->GetSubgraphID());
}
if (boundTensorIDs.size() == 1) {
return false;
}
}
return true;
}
void SubgraphToFunction::RecordIncastInfo(Function& function, RecordInfo recordInfo, SubfuncInvokeInfoTy& iter)
{
size_t i = recordInfo.i;
size_t j = recordInfo.j;
size_t k = recordInfo.k;
LogicalTensorPtr iOperand = recordInfo.operand;
Offset offset = recordInfo.offset;
Shape shape = recordInfo.shape;
auto& op = *nLIST[i][j];
if (!isFromCast(iOperand)) {
APASS_LOG_INFO_F(
Elements::Tensor, "Tensor %d has consumer in same subgraph, cannot be incast.", iOperand->GetMagic());
return;
}
if (op.HasAttribute(OpAttributeKey::inplaceIdx) && !iOperand->GetProducers().empty()) {
if (!SubgraphUtils::IsBoundary(iOperand)) {
return;
}
}
if (function.IsFromInCast(iOperand) || function.IsFromOutCast(iOperand)) {
iter.RecordTensorArg(
k, iOperand->GetRawMagic(), offset, shape, iOperand->tensor->rawshape, iOperand->Datatype(), false,
iOperand, nLIST[i][j]->opmagic);
return;
}
if (!SubgraphUtils::IsBoundary(iOperand) || iOperand->GetMemoryTypeOriginal() != MemoryType::MEM_DEVICE_DDR) {
return;
}
auto producers = iOperand->GetProducers();
if (producers.size() != 0) {
RecordConnectionWithProducers(recordInfo, iter);
return;
}
if (IsCopyIn(nLIST[i][j]->GetOpcode())) {
iter.RecordConnection(
i, i, k, iOperand->GetRawMagic(), offset, shape, iOperand->tensor->rawshape, iOperand->Datatype(), iOperand,
nLIST[i][j]->opmagic);
}
}
void SubgraphToFunction::RecordEsgIncast(Function& function, size_t i, size_t j, size_t k)
{
auto& iter = subFuncInvokeInfos[i];
auto iOperand = nLIST[i][j]->GetInputOperand(k);
auto offset = iOperand->offset;
auto shape = iOperand->shape;
if (IsCopyIn(nLIST[i][j]->GetOpcode())) {
offset.clear();
std::shared_ptr<CopyOpAttribute> attr =
std::static_pointer_cast<CopyOpAttribute>(nLIST[i][j]->GetOpAttribute());
std::vector<OpImmediate> opImmList = attr->GetCopyInAttr().first;
for (auto& opImm : opImmList) {
offset.push_back(
opImm.GetSpecifiedValue().ConcreteValid() ? static_cast<int>(opImm.GetSpecifiedValue()) : -1);
}
shape = attr->GetSpecifiedShape(1);
}
RecordInfo recordInfo = {i, j, k, iOperand, shape, offset};
RecordIncastInfo(function, recordInfo, iter);
}
void SubgraphToFunction::RecordOutcastInfo(Function& function, RecordInfo recordInfo, SubfuncInvokeInfoTy& iter)
{
size_t i = recordInfo.i;
size_t j = recordInfo.j;
size_t k = recordInfo.k;
LogicalTensorPtr oOperand = recordInfo.operand;
Offset offset = recordInfo.offset;
Shape shape = recordInfo.shape;
auto& op = *nLIST[i][j];
if (!isFromCast(oOperand)) {
APASS_LOG_INFO_F(
Elements::Tensor, "Tensor %d has consumer in same subgraph, cannot be outcast.", oOperand->GetMagic());
return;
}
if (op.HasAttribute(OpAttributeKey::inplaceIdx) &&
(op.GetOpcode() != Opcode::OP_COPY_OUT && op.GetOpcode() != Opcode::OP_INDEX_PUT &&
op.GetOpcode() != Opcode::OP_INDEX_ADD)) {
return;
}
if (function.IsFromOutCast(oOperand) || function.IsFromInCast(oOperand)) {
iter.RecordTensorArg(
k, oOperand->GetRawMagic(), offset, shape, oOperand->tensor->rawshape, oOperand->Datatype(), true, oOperand,
nLIST[i][j]->opmagic);
return;
}
int refCount = 0;
typename SubfuncInvokeInfoTy::SuccessorIncastInfoTy relatedIncastList;
if (!SubgraphUtils::IsBoundary(oOperand) || oOperand->GetMemoryTypeOriginal() != MemoryType::MEM_DEVICE_DDR) {
return;
}
auto consumers = oOperand->GetConsumers();
for (auto& consumer : consumers) {
auto eSgId = consumer->GetSubgraphID();
if (eSgId == static_cast<int>(i)) {
continue;
}
refCount++;
int connectedTgtOperandIdx = oOperand->magic;
relatedIncastList.push_back(typename SubfuncInvokeInfoTy::SuccessorIncastRecTy(
eSgId, connectedTgtOperandIdx, nullptr, consumer->GetOpMagic()));
}
iter.RecordOutcast(
i, k, refCount, oOperand->GetRawMagic(), relatedIncastList, offset, shape, oOperand->tensor->rawshape,
oOperand->Datatype(), oOperand, nLIST[i][j]->opmagic);
nLIST[i][j]->outcastRefcount = refCount;
}
void SubgraphToFunction::RecordEsgOutcast(Function& function, size_t i, size_t j, size_t k)
{
auto& iter = subFuncInvokeInfos[i];
auto oOperand = nLIST[i][j]->GetOutputOperand(k);
auto offset = oOperand->offset;
auto shape = oOperand->shape;
if (IsCopyOut(nLIST[i][j]->GetOpcode())) {
offset.clear();
std::shared_ptr<CopyOpAttribute> attr =
std::static_pointer_cast<CopyOpAttribute>(nLIST[i][j]->GetOpAttribute());
std::vector<OpImmediate> opImmList = attr->GetCopyOutAttr().second;
for (auto& opImm : opImmList) {
offset.push_back(
opImm.GetSpecifiedValue().ConcreteValid() ? static_cast<int>(opImm.GetSpecifiedValue()) : -1);
}
shape = attr->GetSpecifiedShape(1);
}
RecordInfo recordInfo = {i, j, k, oOperand, shape, offset};
RecordOutcastInfo(function, recordInfo, iter);
}
void SubgraphToFunction::ConstructnList(Function& function)
{
auto list = function.Operations();
nLIST.resize(function.GetTotalSubGraphCount());
for (size_t i = 0; i < list.size(); i++) {
if (list[i].GetSubgraphID() < 0) {
continue;
}
nLIST[list[i].GetSubgraphID()].push_back(list.operations_[i]);
}
}
void SubgraphToFunction::RecordEsgIncastOutcast(Function& function)
{
for (int i = 0; i < static_cast<int>(nLIST.size()); i++) {
for (size_t j = 0; j < nLIST[i].size(); j++) {
for (size_t k = 0; k < nLIST[i][j]->iOperand.size(); k++) {
RecordEsgIncast(function, i, j, k);
}
for (size_t k = 0; k < nLIST[i][j]->GetOOperands().size(); k++) {
RecordEsgOutcast(function, i, j, k);
}
}
}
}
void SubgraphToFunction::RecordIncastOutcast(Function& function)
{
ConstructnList(function);
for (size_t i = 0; i < nLIST.size(); i++) {
subFuncInvokeInfos.push_back(SubfuncInvokeInfoTy());
}
RecordEsgIncastOutcast(function);
for (auto& item : subFuncInvokeInfos) {
item.DoFinishRecord();
}
}
void SubgraphToFunction::ConstructParamMap(Function& function)
{
if (function.GetFunctionType() == FunctionType::STATIC) {
function.topoInfo_ = staticProcessor_.ConstructSubgraphTopologyInfo(function, subFuncInvokeInfos);
}
for (size_t i = 0; i < subFuncInvokeInfos.size(); i++) {
subFuncInvokeInfos[i].ConstructActualInvokeParam(i);
}
}
void SubgraphToFunction::ProcessInputOperands(
Function& rootFunc, Operation& tileOp, SubfuncParam& pSgParamInfo, int& tParamLoc, int& iParamLoc) const
{
for (size_t k = 0; k < tileOp.GetIOperands().size(); k++) {
auto iOperand = tileOp.GetInputOperand(k);
std::string name = FindSymbolName(iOperand, iOperand->GetRawMagic());
auto offset = iOperand->offset;
auto shape = iOperand->shape;
if (tileOp.HasAttribute(OpAttributeKey::inplaceIdx) && !iOperand->GetProducers().empty()) {
if (!SubgraphUtils::IsBoundary(iOperand)) {
continue;
}
}
if (IsCopyIn(tileOp.GetOpcode())) {
ProcessCopyInOperand(tileOp, offset, shape);
}
if (rootFunc.IsFromInCast(iOperand) || rootFunc.IsFromOutCast(iOperand)) {
pSgParamInfo.AppendTensorParam(
k, iOperand->GetRawMagic(), shape, offset, name, tParamLoc, iOperand->tensor->GetSymbol(),
iOperand->tensor->GetDataType());
tileOp.inParamLocation_.push_back(tParamLoc);
tParamLoc++;
continue;
}
if (!SubgraphUtils::IsBoundary(iOperand) || iOperand->GetMemoryTypeOriginal() != MemoryType::MEM_DEVICE_DDR) {
continue;
}
pSgParamInfo.AppendIncastParam(
k, iOperand->GetRawMagic(), shape, offset, name, iParamLoc, iOperand->tensor->GetSymbol(),
iOperand->tensor->GetDataType());
tileOp.inParamLocation_.push_back(iParamLoc);
iParamLoc++;
}
}
void SubgraphToFunction::ProcessOutputOperands(
Function& rootFunc, Operation& tileOp, SubfuncParam& pSgParamInfo, int& tParamLoc, int& oParamLoc) const
{
for (size_t k = 0; k < tileOp.GetOOperands().size(); k++) {
auto oOperand = tileOp.GetOutputOperand(k);
std::string name = FindSymbolName(oOperand, oOperand->GetRawMagic());
auto offset = oOperand->offset;
auto shape = oOperand->shape;
if (tileOp.HasAttribute(OpAttributeKey::inplaceIdx) &&
(tileOp.GetOpcode() != Opcode::OP_COPY_OUT && tileOp.GetOpcode() != Opcode::OP_INDEX_PUT) &&
tileOp.GetOpcode() != Opcode::OP_INDEX_ADD) {
return;
}
if (IsCopyOut(tileOp.GetOpcode())) {
ProcessCopyOutOperand(tileOp, offset, shape);
}
if (rootFunc.IsFromOutCast(oOperand) || rootFunc.IsFromInCast(oOperand)) {
pSgParamInfo.AppendTensorParam(
k, oOperand->GetRawMagic(), shape, offset, name, tParamLoc, oOperand->tensor->GetSymbol(),
oOperand->tensor->GetDataType());
tileOp.outParamLocation_.push_back(tParamLoc);
tParamLoc++;
continue;
}
if (!SubgraphUtils::IsBoundary(oOperand) || oOperand->GetMemoryTypeOriginal() != MemoryType::MEM_DEVICE_DDR) {
continue;
}
pSgParamInfo.AppendOutcastParam(
k, oOperand->GetRawMagic(), tileOp.outcastRefcount, shape, offset, name, oParamLoc,
oOperand->tensor->GetSymbol(), oOperand->tensor->GetDataType());
tileOp.outParamLocation_.push_back(oParamLoc);
oParamLoc++;
}
}
void SubgraphToFunction::ProcessCopyInOperand(
Operation& tileOp, std::vector<int64_t>& offset, std::vector<int64_t>& shape) const
{
std::shared_ptr<CopyOpAttribute> attr = std::static_pointer_cast<CopyOpAttribute>(tileOp.GetOpAttribute());
if (!attr) {
APASS_LOG_WARN_F(
Elements::Operation,
"Invalid attribute for copyin operation %d. Please check the input graph if the attribute of the operation "
"is missing.",
tileOp.GetOpMagic());
return;
}
std::vector<OpImmediate> opImmList = attr->GetCopyInAttr().first;
shape = attr->GetSpecifiedShape(kShapePlaceholderForParameterized);
if (!opImmList.empty() && opImmList[0].IsParameter()) {
APASS_LOG_DEBUG_F(Elements::Tensor, "CopyinOperand: First operand is paramter, skip offset processiong.");
return;
}
offset.clear();
for (auto& opImm : opImmList) {
offset.push_back(opImm.GetSpecifiedValue().ConcreteValid() ? static_cast<int>(opImm.GetSpecifiedValue()) : -1);
}
}
void SubgraphToFunction::ProcessCopyOutOperand(
Operation& tileOp, std::vector<int64_t>& offset, std::vector<int64_t>& shape) const
{
std::shared_ptr<CopyOpAttribute> attr = std::static_pointer_cast<CopyOpAttribute>(tileOp.GetOpAttribute());
if (!attr) {
APASS_LOG_DEBUG_F(Elements::Tensor, "CopyOutOperand: Invalid op attribute for op %d.", tileOp.GetOpMagic());
return;
}
std::vector<OpImmediate> opImmList = attr->GetCopyOutAttr().second;
shape = attr->GetSpecifiedShape(kShapePlaceholderForParameterized);
if (!opImmList.empty() && opImmList[0].IsParameter()) {
APASS_LOG_DEBUG_F(Elements::Tensor, "CopyOutOperand: First operand is paramter, skip offset processiong.");
return;
}
offset.clear();
for (auto& opImm : opImmList) {
offset.push_back(opImm.GetSpecifiedValue().ConcreteValid() ? static_cast<int>(opImm.GetSpecifiedValue()) : -1);
}
}
void SubgraphToFunction::ProcessSymbolOfReshape(Function& function, Operation& op) const
{
if (op.GetOpcode() == Opcode::OP_RESHAPE) {
if (function.IsFromInCast(op.GetOOperands().front())) {
op.GetOOperands().front()->tensor->SetSymbol(op.GetIOperands().front()->tensor->GetSymbol());
}
}
auto nextOp = *(op.GetOOperands().front()->GetConsumers().begin());
if (nextOp != nullptr && nextOp->GetOpcode() == Opcode::OP_RESHAPE) {
if (function.IsFromOutCast(op.GetOOperands().front())) {
op.GetOOperands().front()->tensor->SetSymbol(nextOp->GetOOperands().front()->tensor->GetSymbol());
}
}
}
void SubgraphToFunction::SymbolizeEachFunction(
Function& rootFunc, std::vector<Function*>& mergedFuncList1, size_t i) const
{
int pSgId = i;
int tParamLoc = 0;
int iParamLoc = 0 | 0x10000000;
int oParamLoc = 0 | 0x20000000;
SubfuncParam pSgParamInfo;
auto& leafFunc = mergedFuncList1[i];
for (auto& tileOp : leafFunc->Operations()) {
ProcessSymbolOfReshape(rootFunc, tileOp);
ProcessInputOperands(rootFunc, tileOp, pSgParamInfo, tParamLoc, iParamLoc);
ProcessOutputOperands(rootFunc, tileOp, pSgParamInfo, tParamLoc, oParamLoc);
}
pSgParamInfo.Finalize();
mergedFuncList1[pSgId]->SetParameter(pSgParamInfo);
mergedFuncList1[pSgId]->SetProgramId(pSgId);
rootFunc.programs_.insert({pSgId, mergedFuncList1[pSgId]});
}
void SubgraphToFunction::SymbolizeFunction(Function& rootFunc, std::vector<Function*>& mergedFuncList1) const
{
for (size_t i = 0; i < mergedFuncList1.size(); i++) {
SymbolizeEachFunction(rootFunc, mergedFuncList1, i);
}
}
void SubgraphToFunction::InsertParameter(size_t i, Function& leafFunc)
{
for (auto& in : subFuncInvokeInfos[i].GetIncastTensorParamList()) {
leafFunc.AppendIncast(in.tensor, in.opMagic, in.operandIdx);
}
for (auto& out : subFuncInvokeInfos[i].GetOutcastTensorParamList()) {
leafFunc.AppendOutcast(out.tensor, out.opMagic, out.operandIdx);
}
for (auto& tensor : subFuncInvokeInfos[i].GetTensorParamList()) {
leafFunc.AddGlobalTensor(tensor.tensor);
if (tensor.isOutputToGM) {
leafFunc.AppendOutcast(tensor.tensor, tensor.opMagic, tensor.operandIdx);
continue;
}
leafFunc.AppendIncast(tensor.tensor, tensor.opMagic, tensor.operandIdx);
}
}
Status SubgraphToFunction::ProcessSubgraph(
Function& function, size_t i, size_t& programIdx, std::vector<Function*>& outputFuncList)
{
auto subgraph = nLIST[i];
auto leafName = function.GetRawName() + "_leaf" + std::to_string(i);
APASS_LOG_DEBUG_F(Elements::Graph, "Add leafFunction %s.", leafName.c_str());
Program::GetInstance().BeginFunction(leafName, FunctionType::STATIC, GraphType::BLOCK_GRAPH);
auto leafFunc = Program::GetInstance().GetCurrentFunction();
leafFunc->SetProgramOp(subgraph);
leafFunc->SetLeafFuncAttribute(std::make_shared<LeafFuncAttribute>());
InsertParameter(i, *leafFunc);
auto result = Program::GetInstance().EndFunction(leafName);
auto callOp = std::get<1>(result);
if (callOp == nullptr) {
APASS_LOG_ERROR_F(Elements::Graph, "leafname %s, program returned nullptr.", leafName.c_str());
return FAILED;
}
callOp->UpdateSubgraphID(i);
callOp->SetSubFuncInvokeInfo(subFuncInvokeInfos[i]);
SetSemanticLabel(subgraph, *callOp);
SetHashOrderInfoOnCallOp(subgraph, *callOp);
return ProcessCacheResult(result, i, programIdx, outputFuncList, *callOp);
}
Status SubgraphToFunction::ProcessCacheResult(
const std::tuple<Function*, Operation*, bool>& result, size_t i, size_t& programIdx,
std::vector<Function*>& outputFuncList, Operation& callOp)
{
const int getValue = 2;
if (std::get<getValue>(result)) {
APASS_LOG_DEBUG_F(
Elements::Operation, "LeafFunc %zu Hit Current hashValue is %lu.", i,
std::get<0>(result)->ComputeHash().GetHash());
psgToESgMap.insert({std::get<0>(result)->GetProgramId(), i});
auto callAttr = dynamic_cast<CallOpAttribute*>(callOp.GetOpAttribute().get());
if (callAttr == nullptr) {
APASS_LOG_ERROR_F(
Elements::Operation, "Failed to get CallOpAttribute for operation %zu. %s", i,
GetFormatBacktrace(callOp).c_str());
return FAILED;
}
auto cacheValue = Program::GetInstance().TryHitCahce(callAttr->GetCalleeHash());
if (!cacheValue) {
APASS_LOG_ERROR_F(
Elements::Operation, "Cache miss for callee hash %lu. %s", callAttr->GetCalleeHash().GetHash(),
GetFormatBacktrace(callOp).c_str());
return FAILED;
}
callAttr->SetCalleeMagicName(cacheValue->GetFunction()->GetMagicName());
callAttr->invokeInfo_->UpdateProgramSubgraphId(std::get<0>(result)->GetProgramId());
return SUCCESS;
}
APASS_LOG_DEBUG_F(
Elements::Operation, "LeafFunc %zu Not Hit. hashValue is %lu.", i,
std::get<0>(result)->ComputeHash().GetHash());
psgToESgMap.insert({programIdx, i});
std::get<0>(result)->SetProgramId(programIdx);
auto callAttr = dynamic_cast<CallOpAttribute*>(callOp.GetOpAttribute().get());
if (callAttr == nullptr) {
APASS_LOG_ERROR_F(
Elements::Operation, "Failed to get CallOpAttribute for operation %zu. %s", i,
GetFormatBacktrace(callOp).c_str());
return FAILED;
}
callAttr->invokeInfo_->UpdateProgramSubgraphId(programIdx);
programIdx++;
outputFuncList.push_back(std::get<0>(result));
std::get<0>(result)->UpdateBelongToThis();
return SUCCESS;
}
void SubgraphToFunction::SetSemanticLabel(const std::vector<std::shared_ptr<Operation>>& subgraph, Operation& callOp)
{
std::shared_ptr<SemanticLabel> label;
if (GetConfig("use_max_freq_label", false)) {
std::unordered_map<std::string, int> freqMap;
std::unordered_map<std::string, std::shared_ptr<SemanticLabel>> labelMap;
for (const auto& op : subgraph) {
auto str = op->GetSemanticLabelStr();
if (str.empty()) {
continue;
}
freqMap[str]++;
labelMap[str] = op->GetSemanticLabel();
}
int maxCount = 0;
for (auto& pair : freqMap) {
if (pair.second > maxCount) {
maxCount = pair.second;
label = labelMap[pair.first];
}
}
} else {
if (subgraph.size() > 0) {
label = subgraph[0]->GetSemanticLabel();
}
}
callOp.SetSemanticLabel(label);
}
void SubgraphToFunction::CopyHashOrderInfoToCallOp(
const std::vector<std::shared_ptr<Operation>>& subgraph, Operation& callOp, const std::string& hashOrderKey,
const std::string& countKey) const
{
for (const auto& op : subgraph) {
if (op == nullptr || !op->HasAttribute(hashOrderKey) || !op->HasAttribute(countKey)) {
continue;
}
auto [hashOrder, subgraphCount] = op->GetHashOrderInfo(hashOrderKey, countKey);
callOp.SetHashOrderInfo(hashOrderKey, countKey, hashOrder, subgraphCount);
return;
}
}
void SubgraphToFunction::SetHashOrderInfoOnCallOp(
const std::vector<std::shared_ptr<Operation>>& subgraph, Operation& callOp) const
{
CopyHashOrderInfoToCallOp(
subgraph, callOp, OpAttributeKey::l1ReuseHashOrder, OpAttributeKey::l1ReuseSubgraphCount);
CopyHashOrderInfoToCallOp(
subgraph, callOp, OpAttributeKey::cubeMergeHashOrder, OpAttributeKey::cubeMergeSubgraphCount);
CopyHashOrderInfoToCallOp(
subgraph, callOp, OpAttributeKey::vecMergeHashOrder, OpAttributeKey::vecMergeSubgraphCount);
}
Status SubgraphToFunction::IslandToFunction(Function& function)
{
auto rootName = Function::CreateRootRawName(function.GetRawName());
Program::GetInstance().BeginFunction(rootName, function.GetFunctionType(), GraphType::EXECUTE_GRAPH);
auto rootFunc = Program::GetInstance().GetCurrentFunction();
if (rootFunc == nullptr) {
APASS_LOG_ERROR_F(Elements::Function, "Failed to create root function.");
return FAILED;
}
InitializeRootFunction(function, *rootFunc);
size_t programIdx = 0;
for (size_t i = 0; i < nLIST.size(); i++) {
Status status = ProcessSubgraph(function, i, programIdx, mergedFuncList);
if (status != SUCCESS) {
APASS_LOG_ERROR_F(Elements::Graph, "Failed to process subgraph %zu.", i);
return status;
}
}
auto rootEndResult = Program::GetInstance().EndFunction(rootName, false);
auto resultFunc = std::get<0>(rootEndResult);
if (resultFunc != rootFunc) {
APASS_LOG_ERROR_F(Elements::Function, "Root function mismatch after finalization.");
return FAILED;
}
if (function.GetFunctionType() == FunctionType::STATIC) {
rootFunc->topoInfo_ = function.topoInfo_;
}
function.rootFunc_ = rootFunc;
if (function.GetFunctionType() == FunctionType::STATIC) {
Status readyStateStatus = staticProcessor_.HandleReadyStates(rootFunc);
if (readyStateStatus != SUCCESS) {
APASS_LOG_ERROR_F(Elements::Graph, "Failed to handle ready states.");
return readyStateStatus;
}
}
SymbolizeFunction(*rootFunc, mergedFuncList);
auto graphNum = nLIST.size();
APASS_LOG_INFO_F(Elements::Operation, "Compressed Graph %zu, total_Graph %zu.", programIdx, graphNum);
return SUCCESS;
}
void SubgraphToFunction::InitializeRootFunction(Function& function, Function& rootFunc)
{
rootFunc.SetParent(nullptr);
if (function.IsFunctionTypeAndGraphType(
FunctionType::DYNAMIC_LOOP_PATH, {GraphType::TENSOR_GRAPH, GraphType::TILE_GRAPH})) {
rootFunc.SetDynloopAttribute(function.GetDynloopAttribute());
}
for (auto& tensor : function.outCasts_) {
auto newOutcast = tensor->Clone(rootFunc);
rootFunc.outCasts_.push_back(newOutcast);
auto it = function.outIncastLinkMap.find(tensor->tensor);
if (it != function.outIncastLinkMap.end()) {
rootFunc.outIncastLinkMap[newOutcast->tensor] = it->second;
}
}
for (auto& tensor : function.inCasts_) {
auto newIncast = tensor->Clone(rootFunc);
rootFunc.inCasts_.push_back(newIncast);
for (auto it : rootFunc.outIncastLinkMap) {
if (it.second == tensor->tensor) {
rootFunc.outIncastLinkMap[it.first] = newIncast->tensor;
}
}
}
TensorSet allTensors = GraphUtils::GetAllTensors(rootFunc);
APASS_LOG_DEBUG_F(
Elements::Operation, "Root function tensor map size is %zu.", allTensors.size());
}
struct GetTensorDataOutcastDesc {
std::unordered_map<Opcode, std::vector<Operation*>> opListDict;
Operation* mark;
Operation* copyout;
std::shared_ptr<LogicalTensor> outcast;
};
static std::unordered_map<int, GetTensorDataOutcastDesc> GetTensorDataBuildOutcastDescDict(Function& function)
{
auto operationViewer = function.Operations(false);
std::unordered_map<int, GetTensorDataOutcastDesc> getTensorDataOutcastDescDict;
for (size_t i = 0; i < operationViewer.size(); i++) {
auto& op = operationViewer[i];
int index = GetTensorDataGetIndex(&op);
if (index != -1) {
getTensorDataOutcastDescDict[index].opListDict[op.GetOpcode()].push_back(&op);
}
}
for (auto& [index, desc] : getTensorDataOutcastDescDict) {
(void)index;
ASSERT(OperationErr::OP_SPECIAL_CONSTRAINT, desc.opListDict[Opcode::OP_ADDS].size() == 1)
<< "Expect the size is 1 for opListDict, but we get " << desc.opListDict[Opcode::OP_ADDS].size()
<< "OP_ADDS";
auto mark = desc.opListDict[Opcode::OP_ADDS][0];
std::shared_ptr<LogicalTensor> addsOpOut = mark->GetOOperands()[0];
auto copyout = *addsOpOut->GetConsumers().begin();
ASSERT(OperationErr::OP_SPECIAL_CONSTRAINT, copyout->GetOpcode() == Opcode::OP_COPY_OUT)
<< "Expect Opcode OP_COPY_OUT, but we get " << copyout->GetOpcodeStr() << " at operation["
<< copyout->GetOpMagic() << "].";
;
auto outcast = copyout->GetOOperands()[0];
desc.mark = mark;
desc.copyout = copyout;
desc.outcast = outcast;
}
return getTensorDataOutcastDescDict;
}
struct GetTensorDataUsageDesc {
Operation* refOp;
std::map<int, std::vector<RawSymbolicScalarPtr>> usageDict;
MemoryType subgraphMemoryType;
int subgraphID;
GetTensorDataUsageDesc(
Operation* refOp_, const std::map<int, std::vector<RawSymbolicScalarPtr>>& usageDict_,
MemoryType subgraphMemoryType_, int subgraphID_)
: refOp(refOp_), usageDict(usageDict_), subgraphMemoryType(subgraphMemoryType_), subgraphID(subgraphID_)
{}
};
std::shared_ptr<LogicalTensor> GetTensorDataSubgraphTensor(Operation& refOp)
{
std::shared_ptr<LogicalTensor> subgraphTensor;
switch (refOp.GetOpcode()) {
case Opcode::OP_COPY_IN:
subgraphTensor = refOp.GetOOperands()[0];
break;
case Opcode::OP_COPY_OUT:
subgraphTensor = refOp.GetIOperands()[0];
break;
case Opcode::OP_VEC_DUP:
subgraphTensor = refOp.GetOOperands()[0];
break;
case Opcode::OP_BIND_TENSOR:
subgraphTensor = refOp.GetOOperands()[0];
break;
case Opcode::OP_SHMEM_LOAD:
subgraphTensor = refOp.GetOOperands()[0];
break;
case Opcode::OP_SHMEM_STORE:
subgraphTensor = refOp.GetIOperands()[1];
break;
case Opcode::OP_SHMEM_GET:
subgraphTensor = refOp.GetIOperands()[1];
break;
case Opcode::OP_SHMEM_PUT:
subgraphTensor = refOp.GetIOperands()[2];
break;
case Opcode::OP_VIEW:
subgraphTensor = refOp.GetOOperands()[0];
break;
default:
break;
}
return subgraphTensor;
}
static std::vector<GetTensorDataUsageDesc> GetTensorDataBuildUsageDesc(Function& function)
{
auto operationViewer = function.Operations(false);
std::vector<GetTensorDataUsageDesc> getTensorDataUsageDescList;
for (size_t i = 0; i < operationViewer.size(); i++) {
auto& refOp = operationViewer[i];
std::vector<std::reference_wrapper<SymbolicScalar>> dynScalarList = refOp.GetDynamicAttributeList();
if (dynScalarList.size() == 0) {
continue;
}
std::map<int, std::vector<RawSymbolicScalarPtr>> usageDict = GetTensorDataDict(dynScalarList);
if (usageDict.size() == 0) {
continue;
}
std::shared_ptr<LogicalTensor> subgraphTensor = GetTensorDataSubgraphTensor(refOp);
ASSERT(TensorErr::TENSOR_NULL_POINTER, subgraphTensor != nullptr)
<< "Expect operation[" << refOp.GetOpMagic()
<< "] has valid IOperand/OOperand, but we get nullptr. Please check the operation.";
MemoryType subgraphMemoryType = subgraphTensor->GetMemoryTypeToBe();
int subgraphID = CommonUtils::GetTensorSubgraphID(subgraphTensor);
getTensorDataUsageDescList.emplace_back(&refOp, usageDict, subgraphMemoryType, subgraphID);
}
return getTensorDataUsageDescList;
}
Status SubgraphToFunction::GetTensorDataDependencyInsert(Function& function)
{
IRBuilder builder;
std::unordered_map<int, GetTensorDataOutcastDesc> getTensorDataOutcastDescDict =
GetTensorDataBuildOutcastDescDict(function);
std::vector<GetTensorDataUsageDesc> getTensorDataUsageDescList = GetTensorDataBuildUsageDesc(function);
for (auto& [refOp, usageDict, subgraphMemoryType, subgraphID] : getTensorDataUsageDescList) {
for (auto& [index, callList] : usageDict) {
std::shared_ptr<LogicalTensor> copyInTensor;
if (callList.size() == 0) {
APASS_LOG_ERROR_F(
Elements::Function,
"Call list is empty in function %s. Please check whether the input graph is complete.",
function.GetRawName().c_str());
return FAILED;
}
auto getTensorDataIOType =
callList[0]->GetExpressionOperandList()[GET_TENSOR_DATA_OPERAND_INDEX_IOTYPE]->GetImmediateValue();
auto getTensorDataIOTypeIndex = callList[0]
->GetExpressionOperandList()[GET_TENSOR_DATA_OPERAND_INDEX_IOTYPE_INDEX]
->GetImmediateValue();
std::shared_ptr<LogicalTensor> copyInSourceTensor;
std::shared_ptr<CopyOpAttribute> copyInAttr;
if (getTensorDataIOType == GET_TENSOR_DATA_OPERAND_IOTYPE_INCAST) {
copyInSourceTensor = function.GetIncast()[getTensorDataIOTypeIndex];
copyInTensor = builder.CreateTensorVar(
copyInSourceTensor->Datatype(), copyInSourceTensor->GetShape(), std::vector<SymbolicScalar>{},
copyInSourceTensor->Format());
GraphUtils::CopyDynStatus(copyInTensor, copyInSourceTensor);
std::vector<OpImmediate> copyInOffset(
OpImmediate::Specified(std::vector<int64_t>(copyInTensor->GetShape().size(), 0)));
std::vector<OpImmediate> copyInShape(OpImmediate::Specified(copyInTensor->GetShape()));
std::vector<OpImmediate> copyInRawShape(OpImmediate::Specified(copyInTensor->GetShape()));
copyInAttr =
std::make_shared<CopyOpAttribute>(copyInOffset, MemoryType::MEM_UB, copyInShape, copyInRawShape);
} else if (getTensorDataIOType == GET_TENSOR_DATA_OPERAND_IOTYPE_OUTCAST) {
if (!getTensorDataOutcastDescDict.count(index)) {
APASS_LOG_ERROR_F(
Elements::Function,
"Index %d is not found in function %s. Please check whether the input graph is complete.",
index, function.GetRawName().c_str());
return FAILED;
}
auto& outcastDesc = getTensorDataOutcastDescDict[index];
auto outcastAttr = std::static_pointer_cast<CopyOpAttribute>(outcastDesc.copyout->GetOpAttribute());
copyInSourceTensor = outcastDesc.outcast;
copyInTensor = builder.CreateTensorVar(
copyInSourceTensor->Datatype(), copyInSourceTensor->GetShape(), std::vector<SymbolicScalar>{},
copyInSourceTensor->Format());
GraphUtils::CopyDynStatus(copyInTensor, copyInSourceTensor);
copyInAttr = std::make_shared<CopyOpAttribute>(
outcastAttr->GetToOffset(), MemoryType::MEM_UB, outcastAttr->GetShape(),
outcastAttr->GetRawShape());
} else {
APASS_LOG_ERROR_F(
Elements::Function,
"The operation is neither MOVE_IN nor MOVE_OUT in function %s. Please check whether the input "
"graph is valid.",
function.GetRawName().c_str());
return FAILED;
}
copyInTensor->SetMemoryTypeBoth(subgraphMemoryType);
auto& copyInOp = PassOperationUtils::AddOperation(
function, Opcode::OP_COPY_IN, {copyInSourceTensor}, {copyInTensor},
[©InAttr](Operation& op) {
op.SetOpAttribute(copyInAttr);
});
copyInOp.UpdateSubgraphID(subgraphID);
SetEmuOpcode(©InOp, EMUOP_TENSOR_GETDATA_DEPEND);
GetTensorDataSetIndex(©InOp, index);
refOp->GetIOperands().push_back(copyInTensor);
copyInTensor->AddConsumer(refOp);
}
}
return SUCCESS;
}
Status SubgraphToFunction::GetTensorDataDependencyClear(Function& function)
{
auto root = function.GetRootFunction();
IRBuilder builder;
for (const auto& [psgId, leaf] : root->programs_) {
(void)psgId;
auto iodescDict = leaf->GetTensorDataForLeafGraph();
for (auto& op : leaf->Operations(false)) {
if (!CheckEmuOpcode(&op, EMUOP_TENSOR_GETDATA_DEPEND)) {
continue;
}
auto& copyInOp = op;
copyInOp.SetAsDeleted();
int tensorIndex = GetTensorDataGetIndex(&op);
int addrCoaIndex = GetTensorDataGetCoaIndex(&op);
if (tensorIndex == -1) {
APASS_LOG_ERROR_F(
Elements::Operation, "Atrribute op_emuop_GetTensorData_index is not found for operation[%d]. %s",
op.GetOpMagic(), GetFormatBacktrace(copyInOp).c_str());
return FAILED;
}
if (addrCoaIndex == -1) {
APASS_LOG_ERROR_F(
Elements::Operation, "Atrribute op_emuop_GetTensorData_coaIndex is not found for operation[%d]. %s",
op.GetOpMagic(), GetFormatBacktrace(copyInOp).c_str());
return FAILED;
}
auto incastIndex = leaf->GetIncastIndex(copyInOp.GetIOperands()[0]);
auto desc =
GetTensorDataIODesc(
GET_TENSOR_DATA_OPERAND_IOTYPE_INCAST, incastIndex,
GetParamAddrSymbol()(builder.CreateConstInt(-1), builder.CreateConstInt(addrCoaIndex)));
iodescDict[tensorIndex] = desc;
}
leaf->GetTensorDataRefreshIO(iodescDict);
leaf->EraseOperations(true, true);
}
return SUCCESS;
}
void SubgraphToFunction::DoHealthCheckAfter(Function& function, const std::string& folderPath)
{
std::string prefix = GetDumpFilePrefix(function);
std::string reportPath = folderPath + "/" + prefix + "_ExecuteGraph_Health_Report.json";
GenerateAndExportCombinedReport(function, psgToESgMap, nLIST, reportPath);
}
void SubgraphToFunction::GenerateAndExportCombinedReport(
Function& func, const std::multimap<int, int>& psgToESgMapParam,
const std::vector<std::vector<OperationPtr>>& subgraphGroups, const std::string& filename)
{
json report;
ExecutionGraphStatistic execAnalyzer;
report["execution_graph_analysis"] = execAnalyzer.AnalyzeExecutionGraph(func, psgToESgMapParam, subgraphGroups);
std::ofstream outfile(filename);
constexpr int JSON_INDENTATION_SPACES = 4;
outfile << report.dump(JSON_INDENTATION_SPACES);
outfile.close();
}
Status SubgraphToFunction::TransViewToCopyInBeforeGenSubgraph(Function& function)
{
for (auto& op : function.Operations(false)) {
if (op.GetOpcode() != Opcode::OP_VIEW) {
continue;
}
if (!op.HasAttribute(OpAttributeKey::inplaceIdx)) {
continue;
}
if (op.GetOOperands().size() != 1) {
APASS_LOG_ERROR_F(
Elements::Operation, "Operation[%d] is OP_VIEW. We Expect it has one OOperand but get %zu instead. %s",
op.GetOpMagic(), op.GetOOperands().size(), GetFormatBacktrace(op).c_str());
return FAILED;
}
auto oOperand = op.GetOutputOperand(0);
bool canTrans = true;
for (const auto& consumer : oOperand->GetConsumers()) {
if (OpcodeManager::Inst().IsSharedMemory(consumer->GetOpcode())) {
canTrans = false;
break;
}
if (OpcodeManager::Inst().IsCopyIn(consumer->GetOpcode())) {
return SUCCESS;
}
}
if (!canTrans) {
continue;
}
op.SetOpCode(Opcode::OP_COPY_IN);
auto viewOpAttribute = dynamic_cast<ViewOpAttribute*>(op.GetOpAttribute().get());
if (viewOpAttribute == nullptr) {
APASS_LOG_ERROR_F(
Elements::Operation, "OP Attribute is not found at Operation[%d]. %s", op.GetOpMagic(),
GetFormatBacktrace(op).c_str());
return FAILED;
}
viewToCopyInMapping_.emplace(&op, op.GetOpAttribute());
op.SetOpAttribute(std::make_shared<CopyOpAttribute>(
OpImmediate::Specified(viewOpAttribute->GetFromTensorOffset()), viewOpAttribute->GetTo(),
OpImmediate::Specified(op.oOperand.front()->shape),
OpImmediate::Specified(op.iOperand.front()->tensor->GetDynRawShape()),
OpImmediate::Specified(viewOpAttribute->GetToDynValidShape())));
}
return SUCCESS;
}
Status SubgraphToFunction::RecoverCopyInToViewAfterGenSubgraph(Function& function)
{
for (auto& program : function.rootFunc_->programs_) {
for (auto& op : program.second->Operations(false)) {
if (op.HasAttribute(OpAttributeKey::inplaceIdx) && op.GetOpcode() == Opcode::OP_COPY_IN) {
if (viewToCopyInMapping_.count(&op) <= 0) {
APASS_LOG_ERROR_F(
Elements::Operation,
"Operation[%d] is not found after SubgrahToFunction. It exists before the pass. %s",
op.GetOpMagic(), GetFormatBacktrace(op).c_str());
return FAILED;
}
op.SetOpCode(Opcode::OP_VIEW);
op.SetOpAttribute(viewToCopyInMapping_.at(&op));
}
}
}
return SUCCESS;
}
Status SubgraphToFunction::PreCheck(Function& function)
{
SubGraphToFuncChecker checker;
return checker.DoPreCheck(function);
}
Status SubgraphToFunction::PostCheck(Function& function)
{
SubGraphToFuncChecker checker;
if (function.GetFunctionType() == FunctionType::STATIC) {
checker.SetInOutGraph(staticProcessor_.inGraph, staticProcessor_.outGraph);
checker.SetColorGraph(staticProcessor_.colorInGraph, staticProcessor_.colorOutGraph);
}
return checker.DoPostCheck(function);
}
}