* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
* \file function.cpp
* \brief
*/
#include "interface/function/function.h"
#include <queue>
#include <algorithm>
#include <unordered_map>
#include "interface/inner/pre_def.h"
#include "interface/cache/hash.h"
#include "interface/operation/opcode.h"
#include "interface/operation/operation.h"
#include "interface/tensor/tensor_offset.h"
#include "interface/utils/id_gen.h"
#include "tilefwk/data_type.h"
#include "tilefwk/symbolic_scalar.h"
#include "tilefwk/tilefwk.h"
#include "interface/inner/tilefwk.h"
#include "interface/program/program.h"
#include "interface/operation/attribute.h"
#include "interface/tensor/logical_tensor.h"
#include "interface/interpreter/raw_tensor_data.h"
#include "interface/tensor/raw_tensor.h"
#include "interface/configs/config_manager.h"
#include "interface/operation/operation_impl.h"
#include "interface/utils/serialization.h"
#include "interface/interpreter/flow_verifier.h"
#include "passes/pass_utils/subgraph_utils.h"
#include "passes/pass_utils/pass_utils.h"
#include "passes/pass_utils/graph_utils.h"
using namespace npu::tile_fwk;
namespace {
const std::string PREFIX = " ";
const int SPACE_NUM_THREE = 3;
const int LAST_TWO = -2;
const std::set<Opcode> SPECIAL_OPCODE_SET = {Opcode::OP_INDEX_OUTCAST, Opcode::OP_VIEW, Opcode::OP_ASSEMBLE,
Opcode::OP_CALL, Opcode::OP_CONVERT, Opcode::OP_COPY_IN,
Opcode::OP_COPY_OUT};
struct ViewKey {
ViewKey(
const int magic, const std::vector<int64_t>& newShape, const std::vector<int64_t>& newOffset,
const std::vector<SymbolicScalar>& tmpDynOffset)
: rawMagic(magic), shape(newShape), offset(newOffset), dynOffset(tmpDynOffset)
{}
bool operator<(const ViewKey& x) const
{
if (shape != x.shape) {
return shape < x.shape;
} else if (offset != x.offset) {
return offset < x.offset;
}
if (dynOffset.size() != x.dynOffset.size()) {
return dynOffset.size() < x.dynOffset.size();
}
for (size_t i = 0; i < dynOffset.size(); i++) {
if (dynOffset[i].Raw() != x.dynOffset[i].Raw()) {
return dynOffset[i].Raw() < x.dynOffset[i].Raw();
}
}
return rawMagic < x.rawMagic;
}
int rawMagic;
Shape shape;
Offset offset;
std::vector<SymbolicScalar> dynOffset;
};
struct TensorDependency {
std::vector<int> producers;
int pendingConsumerCount{0};
};
struct SortContext {
std::unordered_map<const Operation*, int> opToIndex;
std::unordered_map<const LogicalTensor*, size_t> tensorToDep;
std::vector<TensorDependency> tensorDeps;
std::vector<std::vector<size_t>> tensorDepsByConsumer;
std::vector<int> outDegree;
};
class LightweightOperationSorter {
public:
LightweightOperationSorter(
const Function& function, const std::vector<std::shared_ptr<Operation>>& operations,
const LogicalTensors& inCasts, const LogicalTensors& outCasts)
: function_(function), operations_(operations), inCasts_(inCasts), outCasts_(outCasts)
{}
std::vector<std::shared_ptr<Operation>> Sort()
{
BuildOpIndex();
BuildTensorDeps();
BuildOutDegree();
auto sortedOperations = RunTopologicalSort();
CheckSortedOperations(sortedOperations);
std::reverse(sortedOperations.begin(), sortedOperations.end());
return sortedOperations;
}
private:
void BuildOpIndex()
{
context_.opToIndex.reserve(operations_.size());
for (size_t idx = 0; idx < operations_.size(); idx++) {
auto op = operations_[idx].get();
auto insertResult = context_.opToIndex.emplace(op, static_cast<int>(idx));
FE_ASSERT(FeError::IS_EXIST, insertResult.second) << "Duplicate operation found: " << op->Dump();
}
}
size_t GetOrCreateTensorDepIndex(const LogicalTensorPtr& tensor)
{
auto iter = context_.tensorToDep.find(tensor.get());
if (iter != context_.tensorToDep.end()) {
return iter->second;
}
size_t depIndex = context_.tensorDeps.size();
context_.tensorToDep.emplace(tensor.get(), depIndex);
TensorDependency dep;
dep.producers.reserve(tensor->GetProducers().size());
for (const auto& producer : tensor->GetProducers()) {
if (producer->BelongTo() != &function_) {
continue;
}
auto producerIter = context_.opToIndex.find(producer);
FE_ASSERT(FeError::NOT_EXIST, producerIter != context_.opToIndex.end())
<< "Producer not found in opToIndex: " << producer->Dump();
dep.producers.emplace_back(producerIter->second);
}
context_.tensorDeps.emplace_back(std::move(dep));
return depIndex;
}
void RecordTensorDep(int consumerIdx, const LogicalTensorPtr& tensor)
{
size_t depIndex = GetOrCreateTensorDepIndex(tensor);
auto& dep = context_.tensorDeps[depIndex];
if (dep.producers.empty()) {
return;
}
dep.pendingConsumerCount++;
context_.tensorDepsByConsumer[consumerIdx].emplace_back(depIndex);
}
void BuildTensorDeps()
{
context_.tensorToDep.reserve(operations_.size());
context_.tensorDepsByConsumer.resize(operations_.size());
for (size_t idx = 0; idx < operations_.size(); idx++) {
const auto& op = operations_[idx];
for (const auto& iop : op->iOperand) {
RecordTensorDep(static_cast<int>(idx), iop);
}
for (const auto& dop : op->dependOperand) {
RecordTensorDep(static_cast<int>(idx), dop);
}
if (!op->IsCall()) {
for (auto [type, index] : GetTensorDataUsage(op->GetDynamicAttributeList())) {
if (type == GET_TENSOR_DATA_OPERAND_IOTYPE_INCAST) {
RecordTensorDep(static_cast<int>(idx), inCasts_[index]);
} else if (type == GET_TENSOR_DATA_OPERAND_IOTYPE_OUTCAST) {
RecordTensorDep(static_cast<int>(idx), outCasts_[index]);
}
}
}
}
}
void BuildOutDegree()
{
context_.outDegree.assign(operations_.size(), 0);
for (auto& dep : context_.tensorDeps) {
if (dep.producers.empty() || dep.pendingConsumerCount == 0) {
continue;
}
for (int producerIdx : dep.producers) {
context_.outDegree[producerIdx]++;
}
}
}
std::vector<std::shared_ptr<Operation>> RunTopologicalSort()
{
std::queue<int> readyOps;
for (size_t idx = 0; idx < operations_.size(); idx++) {
if (context_.outDegree[idx] == 0) {
readyOps.emplace(idx);
}
}
auto releasePrevOp = [this, &readyOps](int prevOpIndex) {
if (--context_.outDegree[prevOpIndex] == 0) {
readyOps.emplace(prevOpIndex);
}
};
auto releaseTensorDep = [this, &releasePrevOp](size_t depIndex) {
auto& dep = context_.tensorDeps[depIndex];
if (--dep.pendingConsumerCount == 0) {
for (int producerIdx : dep.producers) {
releasePrevOp(producerIdx);
}
}
};
std::vector<std::shared_ptr<Operation>> sortedOperations;
sortedOperations.reserve(operations_.size());
while (!readyOps.empty()) {
int currIndex = readyOps.front();
readyOps.pop();
sortedOperations.emplace_back(operations_[currIndex]);
for (size_t depIndex : context_.tensorDepsByConsumer[currIndex]) {
releaseTensorDep(depIndex);
}
}
return sortedOperations;
}
void CheckSortedOperations(const std::vector<std::shared_ptr<Operation>>& sortedOperations) const
{
for (auto& op : operations_) {
const int opIndex = context_.opToIndex.at(op.get());
FE_ASSERT(context_.outDegree[opIndex] == 0) << "cycle detected: " << op->Dump();
}
FE_ASSERT(operations_.size() == sortedOperations.size())
<< "Sorted operations size mismatch: " << sortedOperations.size() << " and original size "
<< operations_.size();
}
const Function& function_;
const std::vector<std::shared_ptr<Operation>>& operations_;
const LogicalTensors& inCasts_;
const LogicalTensors& outCasts_;
SortContext context_;
};
}
std::vector<Operation*> OperationsViewer::DuplicatedOpList() const
{
std::vector<Operation*> opList;
opList.reserve(operations_.size());
for (auto op : operations_) {
opList.emplace_back(op.get());
}
return opList;
}
std::string DynloopFunctionPathNode::Dump() const
{
int indent = 2;
std::ostringstream oss;
std::function<void(const DynloopFunctionPathNode*, int)> dump =
[&oss, &indent, &dump](const DynloopFunctionPathNode* node, int level) {
if (!node->cond.IsValid()) {
oss << std::setw(level * indent) << ' ' << node->root->GetRawName() << "("
<< node->root->GetFunctionHash() << ")\n";
} else {
oss << std::setw(level * indent) << ' ' << node->cond.Dump() << "\n";
if (node->branchNodeList[0] != nullptr) {
dump(node->branchNodeList[0].get(), level + 1);
}
if (node->branchNodeList[1] != nullptr) {
dump(node->branchNodeList[1].get(), level + 1);
}
}
};
dump(this, 0);
return oss.str();
}
std::shared_ptr<DynloopFunctionPathNode> DynloopFunctionAttribute::BuildPathNode()
{
std::shared_ptr<DynloopFunctionPathNode> root = std::make_shared<DynloopFunctionPathNode>();
if (pathList.size() == 1) {
root->root = pathList[0].GetRoot();
} else {
for (size_t i = 0; i < pathList.size(); i++) {
auto node = root;
for (size_t j = 0; j < pathList[i].pathCondList.size(); j++) {
auto& pathCond = pathList[i].pathCondList[j];
if (!node->cond.IsValid()) {
node->cond = pathCond.GetCond();
}
if (node->branchNodeList[pathCond.IsSat()] == nullptr) {
node->branchNodeList[pathCond.IsSat()] = std::make_shared<DynloopFunctionPathNode>();
}
node = node->branchNodeList[pathCond.IsSat()];
}
node->root = pathList[i].GetRoot();
}
}
return root;
}
std::string DynloopFunctionAttribute::DumpBranch() const
{
std::ostringstream oss;
for (size_t i = 0; i < pathList.size(); i++) {
auto& path = pathList[i];
oss << "Branch-" << i << ": "
<< "\n";
for (size_t j = 0; j < path.pathCondList.size(); j++) {
auto& cond = path.pathCondList[j];
oss << " " << cond.GetFile() << ":" << cond.GetLine() << "] " << cond.GetCond().Dump() << ":"
<< cond.IsSat() << "\n";
}
}
oss << "current:" << currIndex << "\n";
for (size_t i = 0; i < currPathCond.size(); i++) {
oss << " " << currPathCond[i].GetFile() << ":" << currPathCond[i].GetLine() << "] "
<< currPathCond[i].GetCond().Dump() << ":" << currPathCond[i].IsSat() << "\n";
}
return oss.str();
}
std::vector<DynloopFunctionPathCondition> DynloopFunctionAttribute::GenCondWithBeginEnd(
const std::vector<DynloopFunctionPathCondition>& conds) const
{
std::vector<DynloopFunctionPathCondition> resultPathCond = conds;
for (auto& cond : resultPathCond) {
if (!cond.cond_.IsExpression()) {
continue;
}
auto expr = std::static_pointer_cast<RawSymbolicExpression>(cond.cond_.Raw());
if (expr->IsLoopEndCall()) {
std::vector<RawSymbolicScalarPtr> operandList{
expr->OperandList()[0], expr->OperandList()[1],
RawSymbolicExpression::CreateBopSub(expr->OperandList()[2], originalRange.Step().Raw())};
auto newExpr = std::make_shared<RawSymbolicExpression>(SymbolicOpcode::T_MOP_CALL, operandList);
cond.cond_ = SymbolicScalar(newExpr);
}
}
return resultPathCond;
}
bool DynloopFunctionAttribute::IterationEnd(int unroll, Function* pathFunc, Operation* operation)
{
auto resultPathCond = GenCondWithBeginEnd(currPathCond);
unrollTimes = unroll;
pathList.emplace_back(pathFunc, resultPathCond, operation);
bool finished = true;
for (size_t idx = 0; idx < currPathCond.size(); idx++) {
if (!currPathCond[idx].IsSat() && !currPathCond[idx].isConst_) {
const auto& cond = currPathCond[idx].cond_;
if (IsLoopBeginOrEndExpr(cond)) {
if (!cond.IsLoopBegin() && !cond.IsLoopEnd()) {
continue;
}
if (std::static_pointer_cast<RawSymbolicExpression>(cond.Raw())->IsLoopBeginCall() &&
!cond.IsLoopBegin()) {
continue;
}
if (std::static_pointer_cast<RawSymbolicExpression>(cond.Raw())->IsLoopEndCall() && !cond.IsLoopEnd()) {
continue;
}
}
finished = false;
break;
}
}
return finished;
}
bool DynloopFunctionAttribute::GuessCondResult(const SymbolicScalar& cond, bool& result)
{
if (cond.ConcreteValid()) {
result = cond.Concrete();
return true;
}
auto condstr = cond.Dump();
for (auto& pcond : currPathCond) {
if (condstr == pcond.GetCond().Dump()) {
result = pcond.IsSat();
return true;
}
}
return false;
}
bool DynloopFunctionAttribute::AppendCond(const SymbolicScalar& cond, const std::string& file, int line)
{
bool result = false;
if (currIndex < currPathCond.size()) {
result = currPathCond[currIndex].IsSat();
} else {
bool isConst = GuessCondResult(cond, result);
currPathCond.emplace_back(result, isConst, cond, file, line);
}
currIndex++;
return result;
}
void DynloopFunctionAttribute::CreateCurrCond()
{
if (pathList.size() == 0) {
currPathCond.clear();
currIndex = 0;
return;
}
for (size_t idx = currPathCond.size() - 1; idx != static_cast<size_t>(-1); idx--) {
if ((!currPathCond[idx].IsSat()) && (!currPathCond[idx].isConst_)) {
const auto& cond = currPathCond[idx].cond_;
if (cond.IsExpression()) {
auto expr = std::static_pointer_cast<RawSymbolicExpression>(cond.Raw());
if (expr->IsLoopBeginCall() && !cond.IsLoopBegin()) {
continue;
}
if (expr->IsLoopEndCall() && !cond.IsLoopEnd()) {
continue;
}
}
currPathCond[idx].IsSat() = true;
currPathCond.erase(currPathCond.begin() + idx + 1, currPathCond.end());
break;
}
}
currIndex = 0;
}
Function::Function(
const Program& belongTo, const std::string& funcMagicName, const std::string& funcRawName, Function* parentFunc)
: ir::Function(ir::Span::Unknown()),
funcMagicName_(funcMagicName),
funcRawName_(funcRawName),
tensorMap_(*this),
belongTo_(belongTo)
{
parent_ = parentFunc;
functionMagic_ = IdGen<IdType::FUNCTION>::Inst().NewId();
opSeed_ = FUNCTION_MAX_INCASTS;
}
OperationsViewer Function::Operations(bool sorted)
{
if (!sorted_ && sorted) {
sorted_ = true;
SortOperations();
}
return OperationsViewer(operations_, opPosition_);
}
bool Function::IsCube() const
{
for (const auto& oper : OperationsViewer(operations_, opPosition_)) {
if ((oper.HasAttr(OpAttributeKey::isCube) && oper.GetBoolAttribute(OpAttributeKey::isCube)) ||
oper.GetOpcode() == Opcode::OP_L1_COPY_IN_CONV) {
return true;
}
}
return false;
}
std::string Function::GetOriginalRawName() const
{
const std::string& OriginalRawName = funcRawName_;
size_t prefixLen = FUNCTION_PREFIX.length();
if (OriginalRawName.substr(0, prefixLen) == FUNCTION_PREFIX) {
return OriginalRawName.substr(prefixLen);
}
return OriginalRawName;
}
OperationsViewer Function::OperationsAfterOOO() { return OperationsViewer(operationsAfterOOO_, opPositionAfterOOO_); }
void Function::RecordOOOSeq()
{
operationsAfterOOO_ = operations_;
opPositionAfterOOO_ = opPosition_;
}
std::vector<OperationPtr>& Function::GetProgramOp()
{
FE_ASSERT(FeError::INVALID_TYPE, graphType_ == GraphType::BLOCK_GRAPH)
<< "Function::GetProgramOp called. Current graph type: " << static_cast<int>(graphType_);
return operations_;
}
void Function::SetProgramOp(const std::vector<OperationPtr>& operations)
{
FE_ASSERT(FeError::INVALID_TYPE, graphType_ == GraphType::BLOCK_GRAPH)
<< "Function::SetProgramOp called. Current graph type: " << static_cast<int>(graphType_);
operations_ = operations;
RefreshOpPosition();
sorted_ = true;
}
void Function::UpdateBelongToThis()
{
FE_ASSERT(FeError::INVALID_TYPE, graphType_ == GraphType::BLOCK_GRAPH)
<< "Function::UpdateBelongToThis called. Current graph type: " << static_cast<int>(graphType_);
for (auto& ele : operations_) {
ele->function_ = this;
}
}
const SubfuncInvokeInfoTy& Function::GetSubFuncInvokeInfo(const size_t i) const
{
auto callAttr = std::dynamic_pointer_cast<CallOpAttribute>(operations_[i]->GetOpAttribute());
FE_ASSERT(FeError::INVALID_PTR, callAttr != nullptr)
<< "Operation at index " << i << " must have a CallOpAttribute";
return *(callAttr->invokeInfo_);
}
int Function::GetParamIndex(const std::shared_ptr<RawTensor>& rawTensor)
{
if (slotScope_ == nullptr) {
return -1;
}
auto slots = slotScope_->LoopupArgSlot(rawTensor);
for (auto slot : slots) {
for (int idx = 0; idx < (int)explicitArgSlots_.size(); idx++) {
if (slot == explicitArgSlots_[idx]) {
return idx;
}
}
}
return -1;
}
void* Function::GetParamAddress(int index)
{
FE_ASSERT(FeError::INVALID_VAL, explicitArgAddrs_.size() > static_cast<uint64_t>(index))
<< "The param address is not stored.";
return explicitArgAddrs_[index];
}
bool Function::HasCallOperation()
{
for (const auto& op : Operations()) {
if (op.GetOpcode() == Opcode::OP_CALL) {
return true;
}
}
return false;
}
void Function::CreateLeafInAndOutCast(const LogicalTensorPtr& inOrOut, LogicalTensors& inOrOutList) const
{
inOrOutList.emplace_back(inOrOut->Clone(*parent_));
}
static int GetTensorDataLookupOutcast(Function* func, Operation* import)
{
auto importTensor = import->GetIOperands()[0];
auto consumerSet = importTensor->GetConsumers();
if (consumerSet.size() != 2) {
return INVALID_IOINDEX;
}
for (auto consumer : consumerSet) {
if (consumer != import) {
auto outcast = consumer->GetOOperands()[0];
auto outcastIndex = func->GetOutcastIndex(outcast);
return outcastIndex;
}
}
return INVALID_IOINDEX;
}
static int GetTensorDataLookupIncast(Function* func, Operation* import)
{
auto importTensor = import->GetIOperands()[0];
auto producerSet = importTensor->GetProducers();
if (producerSet.size() != 1) {
return INVALID_IOINDEX;
}
auto producer = *producerSet.begin();
auto incast = producer->GetIOperands()[0];
auto incastIndex = func->GetIncastIndex(incast);
return incastIndex;
}
GetTensorDataIODescDict Function::GetTensorDataForTensorGraph()
{
GetTensorDataIODescDict iodescDict;
auto currDynFunc = Program::GetInstance().GetCurrentDynamicFunction();
if (currDynFunc == nullptr) {
return iodescDict;
}
auto currDynAttr = currDynFunc->GetDyndevAttribute();
for (auto& op : Operations(false)) {
if (!CheckEmuOpcode(&op, EMUOP_TENSOR_GETDATA_IMPORT)) {
continue;
}
int getTensorDataIndex = GetTensorDataGetIndex(&op);
FE_ASSERT(FeError::INVALID_VAL, getTensorDataIndex != -1) << "Failed to get tensor data index for operation";
FE_ASSERT(FeError::NOT_EXIST, currDynAttr->getTensorDataUsageDict.count(this))
<< "Current function not found in getTensorDataUsageDict";
std::unordered_map<int, Operation*>& importDict = currDynAttr->getTensorDataUsageDict[this].importDict;
FE_ASSERT(FeError::NOT_EXIST, importDict.count(getTensorDataIndex))
<< "Import index " << getTensorDataIndex << " not found in importDict";
auto import = importDict[getTensorDataIndex];
int outcastIndex = GetTensorDataLookupOutcast(this, import);
if (outcastIndex != INVALID_IOINDEX) {
iodescDict[getTensorDataIndex] =
GetTensorDataIODesc(GET_TENSOR_DATA_OPERAND_IOTYPE_OUTCAST, outcastIndex, 0);
} else {
int incastIndex = GetTensorDataLookupIncast(this, import);
FE_ASSERT(FeError::INVALID_VAL, incastIndex != INVALID_IOINDEX)
<< "Both outcast and incast indices are invalid";
iodescDict[getTensorDataIndex] = GetTensorDataIODesc(GET_TENSOR_DATA_OPERAND_IOTYPE_INCAST, incastIndex, 0);
}
}
return iodescDict;
}
GetTensorDataIODescDict Function::GetTensorDataForLeafGraph()
{
GetTensorDataIODescDict iodescDict;
for (auto& op : Operations(false)) {
if (!CheckEmuOpcode(&op, EMUOP_TENSOR_GETDATA_IMPORT)) {
continue;
}
int getTensorDataIndex = GetTensorDataGetIndex(&op);
FE_ASSERT(FeError::INVALID_VAL, getTensorDataIndex != -1) << "Failed to get tensor data index for operation";
auto tensor = op.GetIOperands()[0];
auto incastIndex = GetIncastIndex(tensor);
if (incastIndex != INVALID_IOINDEX) {
iodescDict[getTensorDataIndex] = GetTensorDataIODesc(GET_TENSOR_DATA_OPERAND_IOTYPE_INCAST, incastIndex, 0);
}
}
return iodescDict;
}
void Function::GetTensorDataRefreshIO(const GetTensorDataIODescDict& iodescDict)
{
for (auto& op : Operations(false)) {
std::vector<std::reference_wrapper<SymbolicScalar>> dynamicAttributeList = op.GetDynamicAttributeList();
for (auto& attr : dynamicAttributeList) {
attr.get() = GetTensorDataFillIO(iodescDict, attr.get());
}
}
}
void Function::BeginFunction(const std::vector<std::reference_wrapper<const Tensor>>& explicitOpArgs)
{
auto slotManager = Program::GetInstance().GetTensorSlotManager();
for (auto& arg : explicitOpArgs) {
explicitArgSlots_.push_back(TensorSlot::CreateTensor(arg));
explicitArgAddrs_.push_back(arg.get().GetData());
}
}
bool HasCalleeConsumer(Function& func, Function& calleeFunc, size_t outcastIdx)
{
auto outcast = calleeFunc.GetOutcast()[outcastIdx];
auto outcastSlots = calleeFunc.GetOutCastSlot(outcast);
for (auto otherCallee : func.GetCalleeFunctionList()) {
FE_ASSERT(FeError::INVALID_PTR, otherCallee != nullptr) << func.GetRawName() << "has nullptr callee";
for (auto& incast : otherCallee->GetIncast()) {
auto incastSlots = otherCallee->GetInCastSlot(incast);
if (TensorSlotManager::HasSameSlot(incastSlots, outcastSlots)) {
return true;
}
}
}
return false;
}
static std::vector<int> GetOutcastSlots(Function& func, size_t outcastIdx)
{
auto& outcasts = func.GetOutcast();
FE_ASSERT(FeError::INVALID_VAL, outcastIdx < outcasts.size())
<< "Outcast index " << outcastIdx << " out of bounds for outcast size " << outcasts.size();
return func.GetOutCastSlot(outcasts[outcastIdx]);
}
static bool IsLinkedInplaceOutcast(Function& func, size_t outcastIdx)
{
auto& outcasts = func.GetOutcast();
FE_ASSERT(FeError::INVALID_VAL, outcastIdx < outcasts.size())
<< "Outcast index " << outcastIdx << " out of bounds for outcast size " << outcasts.size();
return func.outIncastLinkMap.count(outcasts[outcastIdx]->GetRawTensor()) != 0;
}
static bool IsLinkedInplaceAssembleDstOutcast(Function& func, size_t outcastIdx)
{
auto outcastSlots = GetOutcastSlots(func, outcastIdx);
auto slotMngr = Program::GetInstance().GetTensorSlotManager();
bool isAssembleDst = false;
for (const auto& slot : slotMngr->assembleSlotSet) {
auto it = slotMngr->slotIndexDict.find(slot);
if (it == slotMngr->slotIndexDict.end()) {
continue;
}
std::vector<int> assembleSlot = {it->second};
if (TensorSlotManager::HasSameSlot(outcastSlots, assembleSlot)) {
isAssembleDst = true;
break;
}
}
if (!isAssembleDst) {
return false;
}
for (auto& [funcName, funcPtr] : Program::GetInstance().GetFunctionMap()) {
(void)funcName;
Function* curFunc = funcPtr.get();
if (curFunc == nullptr) {
continue;
}
auto& outcasts = curFunc->GetOutcast();
for (size_t i = 0; i < outcasts.size(); ++i) {
if (!IsLinkedInplaceOutcast(*curFunc, i)) {
continue;
}
if (TensorSlotManager::HasSameSlot(GetOutcastSlots(*curFunc, i), outcastSlots)) {
return true;
}
}
}
return false;
}
void CalleeSlotNoConsumer(
Function& calleeFunc, Function& func, const std::map<size_t, size_t>& outcasts,
std::map<size_t, size_t>& outcastIdx2parent)
{
for (size_t calleeOutcastIdx = 0; calleeOutcastIdx < calleeFunc.GetOutcast().size(); calleeOutcastIdx++) {
auto caleeOutcast = calleeFunc.GetOutcast()[calleeOutcastIdx];
auto incastSlots = calleeFunc.GetOutCastSlot(caleeOutcast);
for (const auto& [outcastIdx, val] : outcasts) {
(void)val;
auto outcast = func.GetOutcast()[outcastIdx];
auto outcastSlots = func.GetOutCastSlot(outcast);
if (TensorSlotManager::HasSameSlot(incastSlots, outcastSlots) &&
!HasCalleeConsumer(func, calleeFunc, calleeOutcastIdx)) {
outcastIdx2parent[calleeOutcastIdx] = outcastIdx;
break;
}
}
}
}
void Function::EraseCallOpOpnd(const FunctionHash& calleeHash, size_t index)
{
for (auto callop : GetCallopList()) {
auto callopAttr = std::static_pointer_cast<CallOpAttribute>(callop->GetOpAttribute());
FE_ASSERT(FeError::INVALID_PTR, callopAttr != nullptr) << "Processing CallOp:" << callop->Dump();
if (callopAttr->GetCalleeHash() != calleeHash) {
continue;
}
FE_ASSERT(FeError::INVALID_VAL, index < callop->oOperand.size())
<< "Index " << index << " out of bounds for oOperand size " << callop->oOperand.size();
FE_ASSERT(callop->GetOOpAttr().empty()) << "oOpAttrOffset is not empty for CallOp:" << callop->Dump();
FE_ASSERT(callopAttr->GetArgList().empty()) << "ArgList is not empty for CallOp:" << callop->Dump();
FE_ASSERT(callopAttr->GetOutCastIndexToExpr().empty())
<< "OutCastIndexToExpr is not empty for CallOp:" << callop->Dump();
for (auto& consumer : callop->oOperand[index]->GetConsumers()) {
if (consumer->GetOpcode() == Opcode::OP_ASSEMBLE) {
consumer->SetAsDeleted();
}
}
callop->oOperand.erase(callop->oOperand.begin() + index);
}
EraseOperations(true, false);
}
void Function::CheckAndUpdateGetTensorData(size_t currOutcastIdx, size_t newOutcastIdx)
{
for (auto& op : Operations(false)) {
if (!op.IsCall()) {
for (auto& attr : op.GetDynamicAttributeList()) {
attr.get() = UpdateGetTensorDataIOIndex(currOutcastIdx, newOutcastIdx, attr.get());
}
}
}
}
void Function::CleanRedundantOutcast(
std::map<Function*, std::set<size_t>>& removeRecord, std::map<Function*, std::set<size_t>>& getTensorDataRecord)
{
for (auto& [func, removeList] : removeRecord) {
for (auto it = removeList.rbegin(); it != removeList.rend(); ++it) {
auto outCastIdx = *it;
func->Parent().EraseCallOpOpnd(func->GetFunctionHash(), outCastIdx);
func->RemoveOutcast(outCastIdx);
}
if (getTensorDataRecord.count(func) <= 0) {
continue;
}
auto& tensorDataList = getTensorDataRecord[func];
for (auto currOutcastIdx : tensorDataList) {
auto it = std::lower_bound(removeList.begin(), removeList.end(), currOutcastIdx);
size_t newOutcastIdx = currOutcastIdx - std::distance(removeList.begin(), it);
if (currOutcastIdx != newOutcastIdx) {
func->CheckAndUpdateGetTensorData(currOutcastIdx, newOutcastIdx);
}
}
}
}
void RedundantOutCastCheck(
std::map<Function*, std::set<size_t>>& removeRecord, std::map<Function*, std::set<size_t>>& getTensorDataRecord,
Function* func, std::map<size_t, size_t>& outcasts)
{
for (auto calleeFunc : func->GetCalleeFunctionList()) {
FE_ASSERT(FeError::INVALID_PTR, calleeFunc != nullptr) << func->GetMagicName() << "has nullptr calleeFunc";
std::map<size_t, size_t> outcastIdx2parent;
CalleeSlotNoConsumer(*calleeFunc, *func, outcasts, outcastIdx2parent);
if (!outcastIdx2parent.empty()) {
RedundantOutCastCheck(removeRecord, getTensorDataRecord, calleeFunc, outcastIdx2parent);
}
auto& calleeOutCasts = calleeFunc->GetOutcast();
for (auto& [outCastIdx, val] : outcastIdx2parent) {
(void)val;
FE_ASSERT(FeError::INVALID_VAL, calleeOutCasts[outCastIdx].get() != nullptr)
<< "Outcast at index " << outCastIdx << " should not be null";
if (calleeOutCasts[outCastIdx]->IsGetTensorDataOutcast()) {
getTensorDataRecord[calleeFunc].insert(outCastIdx);
FE_ASSERT(FeError::INVALID_VAL, outcastIdx2parent.count(outCastIdx) > 0)
<< "Outcast index " << outCastIdx << " should be in outcastIdx2parent";
getTensorDataRecord[func].insert(outcastIdx2parent[outCastIdx]);
} else if (getTensorDataRecord[calleeFunc].count(outCastIdx) > 0) {
FE_ASSERT(FeError::INVALID_VAL, outcastIdx2parent.count(outCastIdx) > 0)
<< "Outcast index " << outCastIdx << " should be in outcastIdx2parent";
getTensorDataRecord[func].insert(outcastIdx2parent[outCastIdx]);
} else if (IsLinkedInplaceAssembleDstOutcast(*calleeFunc, outCastIdx)) {
continue;
} else {
removeRecord[calleeFunc].insert(outCastIdx);
}
}
}
}
void Function::CleanRedundantOutCast()
{
auto slotMngr = Program::GetInstance().GetTensorSlotManager();
std::vector<int> outputSlots;
for (const auto& slot : slotMngr->outputSlotList) {
outputSlots.push_back(slotMngr->slotIndexDict[slot]);
}
if (slotMngr->outputSlotList.size() == 0) {
for (const auto& slot : slotMngr->inputSlotList) {
outputSlots.push_back(slotMngr->slotIndexDict[slot]);
}
}
std::map<Function*, std::set<size_t>> removeRecord;
std::map<Function*, std::set<size_t>> getTensorDataRecord;
auto calleeLists = GetCalleeFunctionList();
auto& calleeOutCasts = GetOutcast();
std::map<size_t, size_t> outputMap;
for (size_t outCastIdx = 0; outCastIdx < calleeOutCasts.size(); outCastIdx++) {
auto outcastSlots = GetOutCastSlot(calleeOutCasts[outCastIdx]);
if ((!TensorSlotManager::HasSameSlot(outputSlots, outcastSlots)) &&
!HasCalleeConsumer(Parent(), *this, outCastIdx)) {
outputMap[outCastIdx] = 0;
}
}
if (!outputMap.empty()) {
RedundantOutCastCheck(removeRecord, getTensorDataRecord, this, outputMap);
}
for (auto& [outCastIdx, val] : outputMap) {
(void)val;
if (getTensorDataRecord[this].count(outCastIdx) > 0) {
FE_ASSERT(FeError::NOT_EXIST, outputMap.count(outCastIdx) > 0)
<< "outputMap does not contain outCastIdx " << outCastIdx;
getTensorDataRecord[parent_].insert(outputMap[outCastIdx]);
} else {
if (!IsLinkedInplaceAssembleDstOutcast(*this, outCastIdx)) {
removeRecord[this].insert(outCastIdx);
}
}
}
CleanRedundantOutcast(removeRecord, getTensorDataRecord);
}
void Function::InferParamDirection()
{
FE_ASSERT(FeError::INVALID_PTR, dyndevAttr_ != nullptr) << "dyndevAttr_ is empty";
FE_ASSERT(FeError::INVALID_PTR, slotScope_ != nullptr) << "slotscope is empty";
std::map<int, int> slotAttr;
for (size_t i = 0; i < slotScope_->ioslot.outcastSlot.size(); i++) {
for (auto k : slotScope_->ioslot.outcastSlot[i]) {
slotAttr[k] |= (int)ParamDirection::OUT;
}
}
for (size_t i = 0; i < slotScope_->ioslot.incastSlot.size(); i++) {
for (auto k : slotScope_->ioslot.incastSlot[i]) {
slotAttr[k] |= (int)ParamDirection::IN;
}
}
auto& startArgs = dyndevAttr_->startArgsInputTensorList;
auto& directionList = dyndevAttr_->startArgsDirectionList;
for (auto& t : startArgs) {
auto attr = slotAttr[t.get().Id()];
auto direction = attr ? (ParamDirection)attr : ParamDirection::IN;
directionList.push_back(direction);
}
}
void Function::FillOriginInOutCast(std::vector<Operation*>& operationList)
{
std::unordered_set<LogicalTensorPtr> visited;
auto addOrigin = [](auto& t, auto& list) {
for (auto& ele : list) {
if (ele == t) {
return;
}
}
list.push_back(t);
};
for (auto& op : operationList) {
for (auto& iOperand : op->iOperand) {
if (op->IsCall()) {
addOrigin(iOperand, originInCasts_);
}
else if (visited.count(iOperand) == 0) {
visited.insert(iOperand);
if (&iOperand->BelongFunction() != this) {
addOrigin(iOperand, originInCasts_);
}
}
}
for (auto& oOperand : op->oOperand) {
if (op->IsCall() || oOperand->tensor->GetRefCount()) {
addOrigin(oOperand, originOutCasts_);
}
visited.insert(oOperand);
}
}
}
FunctionCallArgs Function::EndFunction(const std::shared_ptr<TensorSlotScope>& scope)
{
std::vector<Operation*> operationList = Operations(false).DuplicatedOpList();
if (IsGraphType(GraphType::TENSOR_GRAPH) || IsFunctionTypeAndGraphType(FunctionType::STATIC, GraphType::TILE_GRAPH)) {
FillOriginInOutCast(operationList);
}
LogicalTensors inArgumentList, outArgumentList;
if (IsGraphType(GraphType::TENSOR_GRAPH) || IsFunctionTypeAndGraphType(FunctionType::STATIC, GraphType::TILE_GRAPH)) {
SetCallOpSlot();
inArgumentList = MakeIncasts(scope);
outArgumentList = MakeOutcasts(scope);
auto iodescDict = GetTensorDataForTensorGraph();
GetTensorDataRefreshIO(iodescDict);
AddOperationGroup(operationList);
SortOperations();
ClearOperationGroups();
if (Program::GetInstance().GetCurrentDynamicFunction()) {
DyndevFunctionAttribute::ValueDependDesc desc = LookupValueDepend();
auto currDynFuncAttr = Program::GetInstance().GetCurrentDynamicFunction()->GetDyndevAttribute();
if (currDynFuncAttr != nullptr) {
currDynFuncAttr->valueDependDescDict[this] = desc;
}
}
} else if (graphType_ == GraphType::EXECUTE_GRAPH) {
} else if (graphType_ == GraphType::BLOCK_GRAPH) {
for (auto& out : outCasts_) {
CreateLeafInAndOutCast(out, outArgumentList);
}
for (auto& in : inCasts_) {
CreateLeafInAndOutCast(in, inArgumentList);
}
for (const auto& op : operations_) {
opSeed_ = std::max(opSeed_, op->GetOpMagic() + 1);
}
} else {
FE_ASSERT(FeError::INVALID_TYPE, false) << "Not support connecting other type of function currently";
}
std::vector<OperandAttribute> iOpAttr;
std::vector<OperandAttribute> oOpAttr;
std::map<int, SymbolicScalar> outIndexToExpr;
std::vector<std::vector<SymbolicScalar>> argList;
if (graphType_ == GraphType::BLOCK_GRAPH) {
argList = NormalizeCoa(iOpAttr, oOpAttr);
GetOutcastSymbolicExpr(outIndexToExpr);
}
BuildTensorMap();
ComputeHash();
return {std::move(inArgumentList), std::move(outArgumentList), std::move(iOpAttr),
std::move(oOpAttr), std::move(outIndexToExpr), std::move(argList)};
}
void Function::AddWhenNotExistOrAssert(
const std::shared_ptr<LogicalTensor>& tensor, std::map<int, int>& magicToRawMagic,
std::map<int, std::shared_ptr<LogicalTensor>>& magicToLogicalTensor)
{
if (auto it = magicToRawMagic.find(tensor->magic); it != magicToRawMagic.end()) {
if (it->second != tensor->tensor->GetRawMagic()) {
FE_LOGI(
"Diff Magic Same RawMagic: %d %s %d %s", it->second,
magicToLogicalTensor[tensor->magic]->Dump().c_str(), tensor->tensor->GetRawMagic(),
tensor->Dump().c_str());
}
}
magicToRawMagic[tensor->magic] = tensor->tensor->GetRawMagic();
magicToLogicalTensor[tensor->magic] = tensor;
}
void Function::TensorMagicCheck() const
{
std::map<int, int> magicToRawMagic;
std::map<int, std::shared_ptr<LogicalTensor>> magicToLogicalTensor;
for (const auto& op : operations_) {
std::map<int, int> subGraphIDCount;
for (const auto& tensor : op->iOperand) {
AddWhenNotExistOrAssert(tensor, magicToRawMagic, magicToLogicalTensor);
}
for (const auto& tensor : op->oOperand) {
AddWhenNotExistOrAssert(tensor, magicToRawMagic, magicToLogicalTensor);
}
}
}
void Function::OperationLoopCheck(const std::string& errorMsg)
{
std::map<LogicalTensor*, std::vector<Operation*>> producers;
std::map<LogicalTensor*, std::vector<Operation*>> consumers;
for (auto&& op : operations_) {
for (auto&& iop : op->GetIOperands()) {
consumers[iop.get()].emplace_back(op.get());
}
for (auto&& oop : op->GetOOperands()) {
producers[oop.get()].emplace_back(op.get());
}
}
enum class DfsState {
TODO = 0,
IN_STACK,
DONE,
};
std::map<int, DfsState> states;
for (auto&& op : operations_) {
int dupOpMagic = -1;
auto cycleDetection = [&states, &dupOpMagic, &consumers](Operation* curr, auto self) -> bool {
int magic = curr->GetOpMagic();
if (states[magic] == DfsState::DONE) {
return false;
}
if (states[magic] == DfsState::IN_STACK) {
dupOpMagic = magic;
FE_LOGE(FeError::EINTERNAL, "[OperationLoopCheck] Cycle detected: ");
FE_LOGE(FeError::EINTERNAL, "[OperationLoopCheck] Operation: %s", curr->Dump().c_str());
return true;
}
states[magic] = DfsState::IN_STACK;
for (auto&& oop : curr->GetOOperands()) {
for (auto* consumer : consumers[oop.get()]) {
if (self(consumer, self)) {
if (dupOpMagic != -1) {
FE_LOGE(FeError::EINTERNAL, "[OperationLoopCheck] Tensor: %s", oop->Dump().c_str());
FE_LOGE(FeError::EINTERNAL, "[OperationLoopCheck] Operation: %s", curr->Dump().c_str());
if (magic == dupOpMagic) {
dupOpMagic = -1;
}
}
return true;
}
}
}
states[magic] = DfsState::DONE;
return false;
};
FE_ASSERT(!cycleDetection(op.get(), cycleDetection)) << errorMsg;
}
}
bool Function::OperationLoopCheck()
{
std::unordered_map<Operation*, int> inLinkNum;
std::unordered_set<Operation*> visitedOp;
std::vector<Operation*> visitStack;
for (std::shared_ptr<Operation> op : operations_) {
inLinkNum[op.get()] = op->ProducerOps().size();
if (inLinkNum[op.get()] == 0) {
visitStack.push_back(op.get());
}
}
while (!visitStack.empty()) {
Operation* currOp = visitStack.back();
visitStack.pop_back();
visitedOp.insert(currOp);
for (Operation* nextOp : currOp->ConsumerOps()) {
inLinkNum[nextOp] -= 1;
if (inLinkNum[nextOp] == 0) {
visitStack.push_back(nextOp);
}
if (inLinkNum[nextOp] < 0) {
FE_LOGE(FeError::EINTERNAL, "[OperationLoopCheck] Operation:%s", nextOp->Dump().c_str());
return false;
}
}
}
if (visitedOp.size() != operations_.size()) {
FE_LOGE(FeError::EINTERNAL, "[OperationLoopCheck] Loop Detected.");
return false;
}
return true;
}
void Function::GetAnIslandIncastsOutcasts(
const std::map<int, int>& opToSubgraph, const int subgraphID, const std::vector<Operation*>& operations,
std::vector<std::shared_ptr<LogicalTensor>>& iOperands,
std::vector<std::shared_ptr<LogicalTensor>>& oOperands) const
{
std::set<std::shared_ptr<LogicalTensor>> allLogicalTensors;
std::set<std::shared_ptr<LogicalTensor>> notOutcasts;
std::set<std::shared_ptr<LogicalTensor>> notIncasts;
for (const auto& opPtr : operations) {
const auto& op = *opPtr;
for (auto&& operand : op.GetIOperands()) {
allLogicalTensors.insert(operand);
bool usedbyotherfunction = false;
for (auto& consumer : operand->GetConsumers()) {
auto magic = consumer->GetOpMagic();
if (consumer->GetOpcode() == Opcode::OP_CALL) {
continue;
}
FE_ASSERT(FeError::NOT_EXIST, opToSubgraph.find(magic) != opToSubgraph.end())
<< "Consumer magic " << magic << " not found in opToSubgraph. "
<< "\n"
<< "Operation: " << op.Dump();
if (opToSubgraph.at(magic) != subgraphID) {
usedbyotherfunction = true;
break;
}
}
if (!usedbyotherfunction) {
notOutcasts.insert(operand);
}
}
for (auto&& operand : op.GetOOperands()) {
allLogicalTensors.insert(operand);
notIncasts.insert(operand);
}
}
std::set_difference(
allLogicalTensors.begin(), allLogicalTensors.end(), notIncasts.begin(), notIncasts.end(),
std::inserter(iOperands, iOperands.begin()));
std::set<std::shared_ptr<LogicalTensor>> tryOOperands;
std::set_difference(
allLogicalTensors.begin(), allLogicalTensors.end(), notOutcasts.begin(), notOutcasts.end(),
std::inserter(tryOOperands, tryOOperands.begin()));
std::set_difference(
tryOOperands.begin(), tryOOperands.end(), iOperands.begin(), iOperands.end(),
std::inserter(oOperands, oOperands.begin()));
std::sort(iOperands.begin(), iOperands.end(), TensorPtrComparator());
std::sort(oOperands.begin(), oOperands.end(), TensorPtrComparator());
}
auto Function::AnnotateOperation()
{
std::map<int, std::vector<Operation*>> subgraphs;
std::map<int, int> opToSubgraph;
for (auto&& op : Operations()) {
FE_ASSERT(FeError::IS_EXIST, opToSubgraph.find(op.GetOpMagic()) == opToSubgraph.end())
<< "Same op magic shall only appear once."
<< "\n"
<< "Duplicate OpMagic found: " << op.GetOpMagic() << "\n"
<< "Operation: " << op.Dump();
if (op.GetSubgraphID() < 0) {
FE_LOGD("Op magic: %d less than 0 graph: %d", op.GetOpMagic(), op.GetSubgraphID());
continue;
}
subgraphs[op.GetSubgraphID()].emplace_back(&op);
opToSubgraph[op.GetOpMagic()] = op.GetSubgraphID();
FE_LOGD("Operation: %d Belong To subgraph: %d", op.GetOpMagic(), op.GetSubgraphID());
}
for (const auto& pair : subgraphs) {
FE_LOGD("Subgraph ID: %d", pair.first);
for (const auto& op : pair.second) {
FE_LOGD("Operation: %s", op->Dump().c_str());
}
}
return std::make_pair(std::move(subgraphs), std::move(opToSubgraph));
}
std::unordered_set<int> Function::LoopCheck()
{
if (totalSubGraphCount_ == 0) {
return {};
}
FE_LOGI("LoopCheck begin.");
auto [subgraphs, opToSubgraph] = AnnotateOperation();
std::map<LogicalTensor*, std::vector<int>> producers;
std::map<LogicalTensor*, std::vector<int>> consumers;
std::map<int, std::vector<std::shared_ptr<LogicalTensor>>> iOperands;
std::map<int, std::vector<std::shared_ptr<LogicalTensor>>> oOperands;
for (auto&& [subgraphID, operations] : subgraphs) {
if (subgraphID == NOT_IN_SUBGRAPH) {
continue;
}
GetAnIslandIncastsOutcasts(opToSubgraph, subgraphID, operations, iOperands[subgraphID], oOperands[subgraphID]);
for (auto&& iop : iOperands[subgraphID]) {
consumers[iop.get()].push_back(subgraphID);
}
for (auto&& oop : oOperands[subgraphID]) {
producers[oop.get()].push_back(subgraphID);
}
}
enum class DfsState {
TODO = 0,
IN_STACK,
DONE,
};
std::map<int, DfsState> states;
std::unordered_set<int> subGraphInCycle;
for (auto&& [subgraphID, operations] : subgraphs) {
(void)operations;
if (subgraphID == NOT_IN_SUBGRAPH) {
continue;
}
int duplicatedSubgraphID = -2;
auto cycleDetection = [&states, &duplicatedSubgraphID, &oOperands, &consumers, &subGraphInCycle](
int currSubgraph, auto self) -> bool {
if (states[currSubgraph] == DfsState::DONE) {
return false;
}
if (states[currSubgraph] == DfsState::IN_STACK) {
duplicatedSubgraphID = currSubgraph;
FE_LOGE(FeError::EINTERNAL, "[Cycle Detection] Cycle detected: ");
FE_LOGE(FeError::EINTERNAL, "[Cycle Detection] subgraph id: %d", currSubgraph);
subGraphInCycle.emplace(currSubgraph);
return true;
}
states[currSubgraph] = DfsState::IN_STACK;
for (auto&& oop : oOperands[currSubgraph]) {
for (int consumer : consumers[oop.get()]) {
if (self(consumer, self)) {
if (duplicatedSubgraphID != -2) {
FE_LOGE(FeError::EINTERNAL, "[Cycle Detection] tensor: %s", oop->Dump().c_str());
FE_LOGE(FeError::EINTERNAL, "[producer]=");
for (const auto& producer : oop->GetProducers()) {
FE_LOGE(FeError::EINTERNAL, "%d", producer->GetOpMagic());
}
FE_LOGE(FeError::EINTERNAL, "[Cycle Detection] subgraph id: %d", currSubgraph);
subGraphInCycle.emplace(currSubgraph);
if (currSubgraph == duplicatedSubgraphID) {
duplicatedSubgraphID = -2;
}
}
return true;
}
}
}
states[currSubgraph] = DfsState::DONE;
return false;
};
if (cycleDetection(subgraphID, cycleDetection)) {
return subGraphInCycle;
}
}
return std::unordered_set<int>{};
}
std::vector<std::shared_ptr<Operation>> Function::GetSortedOperations() const
{
std::unordered_map<const Operation*, int> opToIndex;
std::unordered_map<const Operation*, std::set<std::pair<int, int>>> usageDict;
for (size_t idx = 0; idx < operations_.size(); idx++) {
auto op = operations_[idx].get();
FE_ASSERT(FeError::IS_EXIST, opToIndex.count(op) == 0) << "Duplicate operation found: " << op->Dump();
opToIndex.emplace(op, idx);
if (!op->IsCall()) {
auto attrList = op->GetDynamicAttributeList();
usageDict[op] = GetTensorDataUsage(attrList);
}
}
std::vector<int> outDegree(operations_.size(), 0);
std::vector<int> prevOperation(operations_.size(), -1);
auto addProd = [&](auto operation, auto ioperand) {
for (const auto& prod : ioperand->GetProducers()) {
if (prod->BelongTo() != this || prod == operation) {
continue;
}
FE_ASSERT(FeError::NOT_EXIST, opToIndex.count(prod) != 0)
<< "Producer not found in opToIndex: " << prod->Dump();
outDegree[opToIndex[prod]]++;
}
};
for (auto& op : operations_) {
for (auto& iop : op->iOperand) {
addProd(op.get(), iop);
}
for (auto& dop : op->dependOperand) {
addProd(op.get(), dop);
}
for (auto [type, index] : usageDict[op.get()]) {
if (type == GET_TENSOR_DATA_OPERAND_IOTYPE_INCAST) {
addProd(op.get(), inCasts_[index]);
} else if (type == GET_TENSOR_DATA_OPERAND_IOTYPE_OUTCAST) {
addProd(op.get(), outCasts_[index]);
}
}
}
for (auto& opGroup : operationGroups_) {
for (size_t idx = 1; idx < opGroup.size(); idx++) {
prevOperation[opToIndex[opGroup[idx]]] = opToIndex[opGroup[idx - 1]];
outDegree[opToIndex[opGroup[idx - 1]]]++;
}
}
std::queue<int> q;
for (size_t idx = 0; idx < operations_.size(); idx++) {
if (outDegree[idx] == 0) {
q.emplace(idx);
}
}
auto visit = [&](auto operation, auto ioperand) {
for (const auto& producer : ioperand->GetProducers()) {
if (producer->BelongTo() != this || producer == operation) {
continue;
}
auto nxtOpIndex = opToIndex[producer];
if (--outDegree[nxtOpIndex] == 0) {
q.emplace(nxtOpIndex);
}
}
};
std::vector<std::shared_ptr<Operation>> sortedOperations;
while (!q.empty()) {
const auto& op = operations_[q.front()];
q.pop();
sortedOperations.emplace_back(op);
int prevOpIndex = prevOperation[opToIndex[op.get()]];
if (prevOpIndex >= 0) {
if (--outDegree[prevOpIndex] == 0) {
q.emplace(prevOpIndex);
}
}
for (auto& iop : op->iOperand) {
visit(op.get(), iop);
}
for (auto& dop : op->dependOperand) {
visit(op.get(), dop);
}
for (auto [type, index] : usageDict[op.get()]) {
if (type == GET_TENSOR_DATA_OPERAND_IOTYPE_INCAST) {
visit(op.get(), inCasts_[index]);
} else if (type == GET_TENSOR_DATA_OPERAND_IOTYPE_OUTCAST) {
visit(op.get(), outCasts_[index]);
}
}
}
for (auto& op : operations_) {
FE_ASSERT(FeError::OP_DEPENDENCY_CYCLE, outDegree[opToIndex[op.get()]] == 0)
<< "cycle detected: " << op->Dump();
}
FE_ASSERT(operations_.size() == sortedOperations.size())
<< "Sorted operations size mismatch: " << sortedOperations.size() << " and original size "
<< operations_.size();
std::reverse(sortedOperations.begin(), sortedOperations.end());
return sortedOperations;
}
std::vector<std::shared_ptr<Operation>> Function::GetLightweightSortedOperations() const
{
FE_ASSERT(operationGroups_.empty()) << "Lightweight sort does not support operationGroups_.";
LightweightOperationSorter sorter(*this, operations_, inCasts_, outCasts_);
return sorter.Sort();
}
void Function::SortOperations(SortOperationsMode mode)
{
std::vector<std::shared_ptr<Operation>> sortedOperations;
switch (mode) {
case SortOperationsMode::GENERAL:
sortedOperations = GetSortedOperations();
break;
case SortOperationsMode::LIGHTWEIGHT:
sortedOperations = GetLightweightSortedOperations();
break;
default:
FE_ASSERT(FeError::INVALID_VAL, false) << "Invalid sort operations mode.";
}
operations_ = sortedOperations;
RefreshOpPosition();
sorted_ = true;
}
void Function::ScheduleBy(const std::vector<Operation*>& newList, bool needRefresh)
{
if (needRefresh) {
RefreshOpPosition();
}
FE_ASSERT(newList.size() == operations_.size())
<< "Size mismatch: newList size = " << newList.size() << ", operations_ size = " << operations_.size();
std::vector<std::shared_ptr<Operation>> newOperations;
for (auto op : newList) {
FE_ASSERT(FeError::NOT_EXIST, opPosition_.count(op) > 0) << "Operation not found in opPosition_:" << op->Dump();
newOperations.emplace_back(operations_[opPosition_.at(op)]);
}
operations_ = newOperations;
RefreshOpPosition();
sorted_ = true;
}
void Function::AddOperationGroup(std::vector<Operation*> operationGroup)
{
size_t groupID = operationGroups_.size();
for (const auto& operation : operationGroup) {
FE_ASSERT(FeError::IS_EXIST, operation->GroupID() == NON_GROUP)
<< "Operation already in a group:" << operation->Dump();
operation->SetGroupID(groupID);
}
operationGroups_.emplace_back(std::move(operationGroup));
sorted_ = false;
}
void Function::ClearOperationGroups()
{
for (auto& opGroup : operationGroups_) {
for (auto& op : opGroup) {
op->SetGroupID(NON_GROUP);
}
}
operationGroups_.clear();
}
void Function::CheckGroupValid() const
{
std::unordered_set<const Operation*> inGroupOp;
for (size_t i = 0; i < operationGroups_.size(); i++) {
for (auto& operation : operationGroups_[i]) {
FE_ASSERT(operation->GroupID() == i) << "Operation GroupID mismatch:\n"
<< "Expected: " << i << ", Actual: " << operation->GroupID() << "\n"
<< "Operation:" << operation->Dump();
FE_ASSERT(FeError::IS_EXIST, inGroupOp.count(operation) == 0)
<< "Duplicate operation in group:" << operation->Dump();
inGroupOp.emplace(operation);
}
}
for (const auto& operation : operations_) {
FE_ASSERT(FeError::IS_EXIST, inGroupOp.count(operation.get()) == (operation->GroupID() != NON_GROUP))
<< "Operation group membership mismatch:\n"
<< "Operation: " << operation->Dump() << "\n"
<< "GroupID: " << operation->GroupID();
}
}
void Function::RefreshOpPosition()
{
opPosition_.clear();
for (size_t idx = 0; idx < operations_.size(); ++idx) {
FE_ASSERT(FeError::NOT_EXIST, opPosition_.count(operations_[idx].get()) == 0)
<< "Duplicate operation found in opPosition_:\n"
<< operations_[idx]->Dump();
opPosition_.emplace(operations_[idx].get(), idx);
}
}
bool Function::enableMagicLookupRecord_{false};
std::map<std::pair<int, int>, std::set<Operation*, LogicalTensor::CompareOp>> Function::tensorAndSubgraphToProducer_;
void Function::ProducerMagicLookup(
const Function* function, const LogicalTensorPtr& tensor,
const std::set<Operation*, LogicalTensor::CompareOp>& producers, const int subGraphId, int& index,
std::unordered_map<int, int>& magic2index, std::stringstream& ss)
{
for (auto& op : producers) {
if (subGraphId != INT32_MIN && op->GetSubgraphID() != subGraphId) {
continue;
}
ss << " " << op->GetOpcodeStr(true);
for (size_t idx = 0; idx < op->GetOOperands().size(); idx++) {
if (op->GetOutputOperand(idx) == tensor) {
ss << "oAttrOffset " << idx << " " << op->GetOOpAttrOffset(idx) << " ";
}
}
for (size_t idx = 0; idx < op->GetIOperands().size(); idx++) {
ss << "iAttrOffset " << idx << " " << op->GetIOpAttrOffset(idx) << " ";
}
if (function->GetFunctionType() == FunctionType::STATIC) {
if (OpcodeManager::Inst().IsBoundaryIn(op->GetOpcode())) {
for (size_t idx = 1; idx < op->iOperand[0]->tensor->rawshape.size(); idx++) {
ss << op->iOperand[0]->tensor->rawshape[idx] << " ";
}
}
if (OpcodeManager::Inst().IsBoundaryOut(op->GetOpcode())) {
for (size_t idx = 1; idx < op->oOperand[0]->tensor->rawshape.size(); idx++) {
ss << op->oOperand[0]->tensor->rawshape[idx] << " ";
}
}
}
for (const auto& attr : OpcodeManager::Inst().GetAttrs(op->GetOpcode())) {
ss << " attr: [" << attr << " : " << op->DumpAttr(attr) << "]";
}
if (function->GetGraphType() != GraphType::BLOCK_GRAPH) {
ss << op->GetTileShape().ToString();
}
if (op->GetOpAttribute() != nullptr) {
if (op->GetOpcode() == Opcode::OP_ASSEMBLE) {
if (!SubgraphUtils::IsBoundary(op->oOperand[0])) {
ss << " " << op->GetOpAttribute()->Dump();
}
} else if (function->GetGraphType() == GraphType::BLOCK_GRAPH) {
ss << " " << op->GetOpAttribute()->Dump();
} else if (
(!IsCopyIn(op->GetOpcode()) && !IsCopyOut(op->GetOpcode())) ||
function->GetGraphType() != GraphType::BLOCK_GRAPH) {
ss << " " << op->GetOpAttribute()->Dump();
}
}
MagicLookup(function, op->iOperand, subGraphId, index, magic2index, ss);
}
}
void Function::MagicLookup(
const Function* function, const std::vector<LogicalTensorPtr>& operand, const int subGraphId, int& index,
std::unordered_map<int, int>& magic2index, std::stringstream& ss)
{
for (auto& t : operand) {
if (magic2index.count(t->GetMagic()) && (function->inCastsSet_.count(t) == 0) &&
t->GetProducers().size() != 0) {
continue;
}
magic2index[t->GetMagic()] = index++;
ss << "("
<< " " << static_cast<int>(t->tensor->datatype) << " ";
for (const auto& dim : t->shape) {
ss << dim << " ";
}
if (function->IsFunctionType(FunctionType::STATIC)) {
for (const auto& dim : t->oriShape) {
ss << dim << " ";
}
}
if (t->GetMemoryTypeOriginal() != MemoryType::MEM_DEVICE_DDR) {
for (const auto& dim : t->offset) {
ss << dim << " ";
}
}
if (!enableMagicLookupRecord_) {
ProducerMagicLookup(function, t, t->GetProducers(), subGraphId, index, magic2index, ss);
} else if (tensorAndSubgraphToProducer_.count({t->GetMagic(), subGraphId}) > 0) {
ProducerMagicLookup(
function, t, tensorAndSubgraphToProducer_[{t->GetMagic(), subGraphId}], subGraphId, index, magic2index,
ss);
}
ss << ")";
}
}
unsigned long Function::ComputeHashOrderless() const
{
std::stringstream ss;
ss << std::to_string(static_cast<int>(functionType_)) << " ";
ss << std::to_string(static_cast<int>(graphType_)) << " ";
if (!IsGraphType({GraphType::BLOCK_GRAPH, GraphType::LEAF_VF_GRAPH}) &&
!IsFunctionTypeAndGraphType(FunctionType::STATIC, GraphType::TENSOR_GRAPH)) {
ss << GetMagicName() << " ";
}
int index = 0;
std::unordered_map<int, int> magic2index;
if (graphType_ == GraphType::BLOCK_GRAPH) {
if (operations_.size()) {
MagicLookup(
this, GetOutcast(), operations_[operations_.size() - 1]->GetSubgraphID(), index, magic2index, ss);
}
} else {
MagicLookup(this, GetOutcast(), INT32_MIN, index, magic2index, ss);
}
for (size_t i = 0; i < operations_.size(); i++) {
if (operations_[i]->oOperand.empty()) {
ss << " " << operations_[i]->GetOpcodeStr(true);
for (const auto& attr : OpcodeManager::Inst().GetAttrs(operations_[i]->GetOpcode())) {
ss << " attr: [" << attr << " : " << operations_[i]->DumpAttr(attr) << "]";
}
ss << operations_[i]->GetTileShape().ToString();
MagicLookup(this, operations_[i]->GetIOperands(), operations_[0]->GetSubgraphID(), index, magic2index, ss);
}
}
for (auto& i : inCasts_) {
ss << "(i" << magic2index[i->GetMagic()] << ")";
bool isGlobal = (globalTensors_.count(i) != 0);
if (isGlobal) {
ss << "(Global)";
}
}
for (auto& o : outCasts_) {
ss << "(o" << magic2index[o->GetMagic()] << ")";
bool isGlobal = (globalTensors_.count(o) != 0);
if (isGlobal) {
ss << "(Global)";
}
}
if (functionType_ == FunctionType::DYNAMIC_LOOP && dynloopAttr_ != nullptr) {
ss << "symbol name:[" << dynloopAttr_->iterSymbolName << "]";
ss << "loop range:[" << dynloopAttr_->loopRange.Dump() << "]";
}
if (functionType_ == FunctionType::DYNAMIC) {
ss << "dynamic unaligned:" << config::GetCodeGenOption<bool>(SUPPORT_DYNAMIC_ALIGNED);
}
if (leafFuncAttr_ != nullptr) {
if (leafFuncAttr_->mixId != LeafFuncAttribute::INVALID_MIX_ID) {
ss << " MIX_ID:" << leafFuncAttr_->mixId;
}
if (leafFuncAttr_->aivCore != AIVCore::UNSPECIFIED) {
ss << " AIV_CORE:" << static_cast<int>(leafFuncAttr_->aivCore);
}
}
std::hash<std::string> hasher;
auto result = hasher(ss.str());
FE_LOGD(
"Hash for function %d %s is %s hash value is %lu\n", functionMagic_, GetMagicName().c_str(), ss.str().c_str(),
result);
return result;
}
void Function::EraseOperations(bool eraseRelatedTensor, bool sorted)
{
std::unordered_set<LogicalTensorPtr> inOutCastSet(inCasts_.begin(), inCasts_.end());
inOutCastSet.insert(outCasts_.begin(), outCasts_.end());
std::vector<std::shared_ptr<Operation>> operations;
std::unordered_set<std::shared_ptr<LogicalTensor>> removeCandidiateTensor;
std::unordered_set<std::shared_ptr<LogicalTensor>> removeProducerTensor;
for (auto& op : operations_) {
if (!op->IsDeleted()) {
operations.emplace_back(op);
continue;
}
FE_ASSERT(op->IsDeleted()) << "Operation not marked as deleted:" << op->Dump();
for (auto& input : op->GetIOperands()) {
input->RemoveConsumer(op.get());
removeCandidiateTensor.insert(input);
}
for (auto& output : op->GetOOperands()) {
output->RemoveProducer(op.get());
removeCandidiateTensor.insert(output);
removeProducerTensor.insert(output);
}
for (auto& depend : op->GetDependOperands()) {
depend->RemoveDependOp(op.get());
removeCandidiateTensor.insert(depend);
}
}
operations_ = operations;
if (eraseRelatedTensor) {
for (auto tensorPtr : removeCandidiateTensor) {
if (inOutCastSet.count(tensorPtr) != 0) {
continue;
}
if (tensorPtr->GetProducers().empty() && tensorPtr->GetConsumers().empty()) {
GetTensorMap().Erase(tensorPtr);
} else if (removeProducerTensor.count(tensorPtr) > 0 && tensorPtr->GetProducers().empty()) {
GetTensorMap().Erase(tensorPtr);
for (auto& consumer : tensorPtr->GetConsumers()) {
if (consumer->BelongTo() == this) {
consumer->EraseInput(tensorPtr);
}
}
}
}
}
if (sorted) {
SortOperations();
}
}
void Function::EraseOperations(const OperationDeleter& deleter)
{
if (!sorted_) {
SortOperations();
}
for (auto& op : operations_) {
if (deleter(op, *this)) {
op->SetAsDeleted();
}
}
EraseOperations();
}
FunctionHash Function::ComputeHash()
{
if (functionHash_.GetHash() != 0 &&
(functionType_ != FunctionType::DYNAMIC_LOOP && functionType_ != FunctionType::DYNAMIC)) {
return functionHash_;
}
for (auto& ele : inCasts_) {
inCastsSet_.emplace(ele);
}
functionHash_ = ComputeHashOrderless();
return functionHash_;
}
void Function::BuildTensorMap()
{
tensorMap_.Reset();
for (auto& ele : inCasts_) {
tensorMap_.Insert(ele);
}
for (auto& ele : operations_) {
for (auto& oOp : ele->GetOOperands()) {
if (!tensorMap_.GetTensorByMagic(oOp->magic)) {
tensorMap_.Insert(oOp);
}
}
}
}
Operation& Function::AddOperation(const std::string& opName, LogicalTensors iOperands, const LogicalTensors& oOperands)
{
return AddOperation(FindOpcode(opName), iOperands, oOperands);
}
Operation& Function::AddOperation(const Opcode opCode, LogicalTensors iOperands, const LogicalTensors& oOperands)
{
CheckTensorDynamicShape(iOperands, opCode);
auto ClearOffset = [](LogicalTensorPtr t) {
int dim = t->shape.size();
t->offset = std::vector<int64_t>(dim, 0);
t->dynOffset_ = std::vector<SymbolicScalar>(dim, SymbolicScalar(0));
};
for (auto& iOp : iOperands) {
LogicalTensorPtr parent = nullptr;
if (iOp->GetAttr("SLICE_PARENT", parent)) {
auto newRaw = std::make_shared<RawTensor> (iOp->tensor->datatype, iOp->shape, iOp->tensor->format, iOp->tensor->symbol);
auto& op = AddRawOperation(Opcode::OP_VIEW, {parent}, {iOp});
op.SetOpAttribute(std::make_shared<ViewOpAttribute>(iOp->offset, iOp->dynOffset_, iOp->dynValidShape_));
ClearOffset(iOp);
iOp->RemoveAttr("SLICE_PARENT");
iOp->tensor = newRaw;
}
}
auto& ret = AddRawOperation(opCode, iOperands, oOperands);
for (auto& oOp : oOperands) {
LogicalTensorPtr parent = nullptr;
if (oOp->GetAttr("SLICE_PARENT", parent)) {
auto newRaw = std::make_shared<RawTensor> (oOp->tensor->datatype, oOp->shape, oOp->tensor->format, oOp->tensor->symbol);
auto& op = AddRawOperation(Opcode::OP_ASSEMBLE, {oOp}, {parent});
op.SetAssembleOpAttribute(oOp->offset, oOp->dynOffset_);
ClearOffset(oOp);
oOp->RemoveAttr("SLICE_PARENT");
oOp->tensor = newRaw;
}
}
return ret;
}
void Function::UpdateTensorDataUsage(Operation& op)
{
auto dynFunc = Program::GetInstance().GetCurrentDynamicFunction();
if (dynFunc == nullptr) {
return;
}
auto dynDevAttr = dynFunc->GetDyndevAttribute();
if (dynDevAttr == nullptr) {
return;
}
auto& descDict = dynDevAttr->getTensorDataDescDict;
auto& importDict = dynDevAttr->getTensorDataUsageDict[this].importDict;
auto dynAttrList = op.GetDynamicAttributeList();
auto dict = GetTensorDataDict(dynAttrList);
for (auto& [index, callList] : dict) {
(void)callList;
FE_ASSERT(FeError::INVALID_VAL, descDict.count(index)) << "Invalid index" << op.Dump();
if (importDict.count(index)) {
continue;
}
auto assemble = descDict[index].assembleTensor;
std::vector<int64_t> importShape(assemble->GetShape().size(), 1);
std::vector<int64_t> importOffset(assemble->GetShape().size(), 0);
auto import = View(*assemble, importShape, importOffset);
auto importOp = *import.GetStorage()->GetProducers().begin();
SetEmuOpcode(importOp, EMUOP_TENSOR_GETDATA_IMPORT);
GetTensorDataSetIndex(importOp, index);
importDict[index] = importOp;
}
}
Operation& Function::AddRawOperation(
const Opcode opCode, const LogicalTensors& iOperands, const LogicalTensors& oOperands, ir::Span span)
{
if (IsFunctionTypeAndGraphType(FunctionType::STATIC, {GraphType::EXECUTE_GRAPH, GraphType::BLOCK_GRAPH})) {
sorted_ = true;
} else {
sorted_ = functionType_ == FunctionType::DYNAMIC;
}
auto& op = operations_.emplace_back(std::make_shared<Operation>(*this, opCode, iOperands, oOperands));
opPosition_.emplace(op.get(), operations_.size() - 1);
auto scopeConfig = config::GetPassOption<std::vector<int64_t>>(SG_SET_SCOPE);
if (scopeConfig.size() == 0x3) {
operations_.back()->SetScopeInfo(Operation::ScopeInfo::FromConfig(scopeConfig));
} else if (scopeConfig.size() == 1) {
operations_.back()->SetScopeId(static_cast<int>(scopeConfig[0]));
} else {
operations_.back()->SetScopeId(-1);
}
if (!span.IsUnknown()) {
operations_.back()->SetSpan(span);
}
return *operations_.back();
}
void Function::SetSameMemId(const LogicalTensorPtr& operand, LogicalTensorPtr& dst)
{
FE_ASSERT(FeError::INVALID_TYPE, operand->Datatype() == dst->Datatype()) << "Check Dtype failed!";
auto dstRaw = dst->GetRawTensor();
auto operandRaw = operand->GetRawTensor();
dstRaw->memoryId = operandRaw->memoryId;
outIncastLinkMap[dstRaw] = operandRaw;
}
std::vector<Operation*> Function::GetAllInputOperations(const Operation& op) const
{
std::vector<Operation*> retOps;
if (op.BelongTo() != this) {
return retOps;
}
for (const LogicalTensorPtr& inTensor : op.GetIOperands()) {
if (inTensor == nullptr) {
continue;
}
for (auto& producer : inTensor->GetProducers()) {
retOps.push_back(producer);
}
}
return retOps;
}
std::vector<Operation*> Function::GetAllOutputOperations(const Operation& op) const
{
std::vector<Operation*> retOps;
if (op.BelongTo() != this) {
return retOps;
}
for (const LogicalTensorPtr& outTensor : op.GetOOperands()) {
if (outTensor == nullptr) {
continue;
}
for (auto& consumer : outTensor->GetConsumers()) {
retOps.push_back(consumer);
}
}
return retOps;
}
std::vector<Operation*> Function::GetCallopList() const
{
std::vector<Operation*> callopList;
for (auto& op : operations_) {
if (op->GetOpcode() != Opcode::OP_CALL) {
continue;
}
callopList.push_back(op.get());
}
return callopList;
}
std::vector<std::shared_ptr<CallOpAttribute>> Function::GetCallopAttrList() const
{
std::vector<Operation*> callopList = GetCallopList();
std::vector<std::shared_ptr<CallOpAttribute>> callopAttrList;
for (auto callop : callopList) {
auto callopAttr = std::static_pointer_cast<CallOpAttribute>(callop->GetOpAttribute());
callopAttrList.push_back(callopAttr);
}
return callopAttrList;
}
std::vector<Function*> Function::GetCalleeFunctionList() const
{
std::vector<Operation*> callopList = GetCallopList();
std::vector<Function*> calleeFuncList;
for (auto callop : callopList) {
auto callopAttr = std::static_pointer_cast<CallOpAttribute>(callop->GetOpAttribute());
auto calleeFunc = Program::GetInstance().GetFunctionByMagicName(callopAttr->GetCalleeMagicName());
FE_ASSERT(FeError::NOT_EXIST, calleeFunc) << callopAttr->GetCalleeMagicName() << " is not in functionmap!";
calleeFuncList.push_back(calleeFunc);
}
return calleeFuncList;
}
void Function::SubstituteIn(std::shared_ptr<LogicalTensor> oldTensor, std::shared_ptr<LogicalTensor> newTensor)
{
for (auto& operation : operations_) {
auto& cur = *operation;
for (size_t i = 0; i < cur.GetInputOperandSize(); i++) {
if (cur.GetInputOperand(i) == oldTensor) {
cur.ReplaceIOperand(i, newTensor);
}
}
}
}
void Function::SubstituteOut(std::shared_ptr<LogicalTensor> oldTensor, std::shared_ptr<LogicalTensor> newTensor)
{
for (auto& operation : operations_) {
auto& cur = *operation;
for (size_t i = 0; i < cur.GetOOperands().size(); i++) {
if (cur.GetOutputOperand(i) == oldTensor) {
cur.ReplaceOOperand(i, newTensor);
}
}
}
}
void Function::Substitute(std::shared_ptr<LogicalTensor> oldTensor, std::shared_ptr<LogicalTensor> newTensor)
{
SubstituteIn(oldTensor, newTensor);
SubstituteOut(oldTensor, newTensor);
}
void Function::RemoveOriginIncastConsumer(const std::shared_ptr<LogicalTensor>& originIncast) const
{
for (const auto& producer : originIncast->GetProducers()) {
auto targetFunc = this;
while (targetFunc != producer->BelongTo()) {
if (!targetFunc->HasParent()) {
targetFunc = nullptr;
break;
}
targetFunc = &targetFunc->Parent();
}
FE_ASSERT(FeError::INVALID_PTR, targetFunc != nullptr) << "Failed to find the target function for producer:\n"
<< "Producer: " << producer->Dump();
for (auto& oOperandForProducerOp : producer->oOperand) {
auto& consumers = oOperandForProducerOp->GetConsumers();
for (auto it = consumers.begin(); it != consumers.end();) {
if ((*it)->BelongTo() == this) {
it = consumers.erase(it);
} else {
it++;
}
}
}
}
}
void Function::UpdateLinkMap(
const std::shared_ptr<LogicalTensor>& oriLogicalTensor, const std::shared_ptr<LogicalTensor>& newLogicalTensor,
const bool isOutCast)
{
if (isOutCast) {
auto it = outIncastLinkMap.find(oriLogicalTensor->tensor);
if (it != outIncastLinkMap.end()) {
outIncastLinkMap[newLogicalTensor->tensor] = it->second;
newLogicalTensor->tensor->memoryId = it->second->memoryId;
FE_LOGD("UpdateLinkMap memoryId to %d \n", it->second->memoryId);
outIncastLinkMap.erase(it);
}
} else {
for (auto& ele : outIncastLinkMap) {
if (ele.second == oriLogicalTensor->tensor) {
ele.second = newLogicalTensor->tensor;
}
}
}
}
std::shared_ptr<LogicalTensor> Function::CreateIncastTensor(const std::shared_ptr<LogicalTensor>& inArgument)
{
auto idx = inCasts_.size();
auto newSymbol = inArgument->tensor->GetSymbol();
if (newSymbol == "") {
newSymbol = "INCAST_SYMBOL" + std::to_string(idx);
}
auto incastSymbol = std::make_shared<LogicalTensor>(
*this, inArgument->tensor->datatype, inArgument->shape, inArgument->tensor->GetDynRawShape(),
inArgument->Format(), newSymbol);
incastSymbol->tensor->UpdateDynRawShape(inArgument->tensor->GetDynRawShape());
inCasts_.push_back(incastSymbol);
UpdateLinkMap(inArgument, incastSymbol);
return incastSymbol;
}
void Function::CreateFromIncast(
const std::shared_ptr<LogicalTensor>& symbol, const std::shared_ptr<LogicalTensor>& newIncast,
const std::shared_ptr<LogicalTensor>& origin)
{
ir::Span::SetCurrent(ir::Span(__FILE__, __LINE__, 0));
auto& incastOp = AddOperation(Opcode::OP_VIEW, {symbol}, {newIncast});
ir::Span::ClearCurrent();
incastOp.SetAttr(OpAttributeKey::isGlobalInput, true);
auto dynOffset = origin->GetDynOffset();
if (dynOffset.empty()) {
dynOffset = SymbolicScalar::FromConcrete(origin->GetOffset());
}
auto validShape = symbol->GetDynValidShape();
ASSERT(!validShape.empty());
incastOp.SetOpAttribute(std::make_shared<ViewOpAttribute>(origin->GetOffset(), dynOffset, validShape));
newIncast->UpdateDynValidShape(validShape);
newIncast->GetRawTensor()->UpdateDynRawShape(symbol->GetDynValidShape());
newIncast->CopyMemoryType(origin);
}
LogicalTensors Function::MakeIncasts(const std::shared_ptr<TensorSlotScope>& scope)
{
LogicalTensors inArgumentList;
for (size_t idx = 0; idx < originInCasts_.size(); idx++) {
auto origin = originInCasts_[idx];
int rank = origin->shape.size();
std::vector<int64_t> zeroOffset(rank, 0);
auto inArgument = std::make_shared<LogicalTensor>(Parent(), origin->tensor, zeroOffset, origin->shape);
inArgumentList.push_back(inArgument);
auto incastSymbol = CreateIncastTensor(inArgument);
if (scope) {
scope->incastToInArgumentDict[incastSymbol] = inArgument;
}
if (slotScope_ && idx < slotScope_->oriIncastReadSlotSet.size()) {
slotScope_->incastReadSlotSet.push_back(slotScope_->oriIncastReadSlotSet[idx]);
slotScope_->ioslot.incastSlot.push_back(slotScope_->originalIocastsSlot.incastSlot[idx]);
}
auto newIncast = std::make_shared<LogicalTensor>(
*this, origin->tensor->datatype, origin->shape, origin->Format(), "INCAST_LOCAL_BUF" + std::to_string(idx));
CreateFromIncast(incastSymbol, newIncast, origin);
Substitute(origin, newIncast);
originInCasts_[idx] = newIncast;
RemoveOriginIncastConsumer(origin);
}
return inArgumentList;
}
std::shared_ptr<LogicalTensor> Function::CreateOutcastTensor(const std::shared_ptr<LogicalTensor>& outArgument)
{
auto idx = outCasts_.size();
auto newSymbol = outArgument->tensor->GetSymbol();
if (newSymbol == "") {
newSymbol = "OUTCAST_SYMBOL" + std::to_string(idx);
}
auto outSymbol = std::make_shared<LogicalTensor>(
*this, outArgument->tensor->datatype, outArgument->shape, outArgument->tensor->GetDynRawShape(),
outArgument->Format(), newSymbol);
outSymbol->tensor->UpdateDynRawShape(outArgument->tensor->GetDynRawShape());
outCasts_.push_back(outSymbol);
UpdateLinkMap(outArgument, outSymbol, true);
return outSymbol;
}
void Function::CreateFromOutcast(
const LogicalTensorPtr& symbol, const LogicalTensorPtr& newOutcast, const LogicalTensorPtr& origin)
{
ir::Span::SetCurrent(ir::Span(__FILE__, __LINE__, 0));
auto& op = AddOperation(Opcode::OP_ASSEMBLE, {newOutcast}, {symbol});
ir::Span::ClearCurrent();
auto dynOffset = origin->GetDynOffset();
if (dynOffset.empty()) {
dynOffset = SymbolicScalar::FromConcrete(origin->GetOffset());
}
auto validShape = origin->GetDynValidShape();
if (validShape.empty()) {
validShape = symbol->GetDynValidShape();
}
ASSERT(!validShape.empty());
op.SetOpAttribute(std::make_shared<AssembleOpAttribute>(origin->GetOffset(), dynOffset));
newOutcast->UpdateDynValidShape(validShape);
newOutcast->GetRawTensor()->UpdateDynRawShape(symbol->GetDynValidShape());
}
namespace {
struct OutcastProducerTraits {
bool isAssembleOut{false};
bool needRuntimeAlloc{false};
};
bool IsAssembleLikeProducer(const Operation* op)
{
return (op->GetOpcode() == Opcode::OP_ASSEMBLE && op->HasAttribute("dassemble")) ||
op->GetOpcode() == Opcode::OP_ASSEMBLE_SSA || op->GetOpcode() == Opcode::OP_ATOMIC_RMW;
}
OutcastProducerTraits ClassifyOutcastByProducers(const LogicalTensor& origin, bool isLinkedInplaceOutcast)
{
OutcastProducerTraits traits;
for (Operation* op : origin.GetProducers()) {
if (IsAssembleLikeProducer(op)) {
traits.isAssembleOut = true;
} else {
if (!isLinkedInplaceOutcast) {
traits.needRuntimeAlloc = true;
}
}
}
return traits;
}
}
LogicalTensors Function::MakeOutcasts(const std::shared_ptr<TensorSlotScope>& scope)
{
LogicalTensors outArgumentList;
for (size_t idx = 0; idx < originOutCasts_.size(); idx++) {
auto origin = originOutCasts_[idx];
int rank = origin->shape.size();
std::vector<int64_t> zeroOffset(rank, 0);
auto outArgument = std::make_shared<LogicalTensor>(
Parent(), origin->tensor, zeroOffset, origin->shape, origin->tensor->GetDynRawShape());
Parent().tensorMap_.Insert(outArgument);
outArgumentList.push_back(outArgument);
bool isLinkedInplaceOutcast = outIncastLinkMap.count(origin->GetRawTensor()) != 0;
auto outSymbol = CreateOutcastTensor(outArgument);
if (scope) {
scope->outcastToOutArgumentDict[outSymbol] = outArgument;
}
if (slotScope_ && idx < slotScope_->oriOutcastWriteSlotSet.size()) {
slotScope_->outcastWriteSlotSet.push_back(slotScope_->oriOutcastWriteSlotSet[idx]);
slotScope_->ioslot.outcastSlot.push_back(slotScope_->originalIocastsSlot.outcastSlot[idx]);
}
const OutcastProducerTraits traits = ClassifyOutcastByProducers(*origin, isLinkedInplaceOutcast);
SetOutcastNeedAlloc(outCasts_.back(), traits.needRuntimeAlloc);
if (traits.isAssembleOut) {
Substitute(origin, outSymbol);
originOutCasts_[idx] = outSymbol;
if (scope) {
scope->partialUpdateOutcastDict[outSymbol] = true;
}
} else {
auto newOutcast = std::make_shared<LogicalTensor>(
*this, origin->tensor->datatype, origin->shape, origin->Format(),
"OUTCAST_LOCAL_BUF" + std::to_string(idx));
CreateFromOutcast(outSymbol, newOutcast, origin);
Substitute(origin, newOutcast);
originOutCasts_[idx] = newOutcast;
}
}
return outArgumentList;
}
bool Function::IsFlattening() const
{
return IsFunctionTypeAndGraphType(FunctionType::STATIC, {GraphType::TENSOR_GRAPH, GraphType::TILE_GRAPH});
}
FunctionType Function::GetFunctionType() const { return functionType_; }
void Function::SetFunctionType(FunctionType type) { functionType_ = type; }
std::string Function::GetFunctionTypeStr() const { return GetFunctionTypeNameDict().Find(functionType_); }
GraphType Function::GetGraphType() const { return graphType_; }
void Function::SetGraphType(GraphType type) { graphType_ = type; }
bool Function::IsFunctionType(FunctionType type) const { return functionType_ == type; }
bool Function::IsFunctionType(std::set<FunctionType> types) const { return types.count(functionType_) != 0; }
bool Function::IsGraphType(GraphType type) const { return graphType_ == type; }
bool Function::IsGraphType(std::set<GraphType> types) const { return types.count(graphType_) != 0; }
bool Function::IsFunctionTypeAndGraphType(FunctionType funcType, GraphType graphType) const
{
return functionType_ == funcType && graphType_ == graphType;
}
bool Function::IsFunctionTypeAndGraphType(FunctionType funcType, std::set<GraphType> graphTypes) const
{
return IsFunctionType(funcType) && IsGraphType(graphTypes);
}
bool Function::IsFunctionTypeAndGraphType(std::set<FunctionType> funcTypes, GraphType graphType) const
{
return IsFunctionType(funcTypes) && IsGraphType(graphType);
}
bool Function::IsFunctionTypeAndGraphType(std::set<FunctionType> funcTypes, std::set<GraphType> graphTypes) const
{
return IsFunctionType(funcTypes) && IsGraphType(graphTypes);
}
void Function::DumpJsonFile(std::string fileName)
{
auto filePath = config::LogTopFolder() + "/" + funcRawName_ + ".json";
if (!fileName.empty()) {
filePath = fileName;
}
std::ofstream file(filePath);
CHECK(FeError::BAD_FD, file.is_open()) << "Failed to open file: " << filePath;
Json progDump;
progDump["version"] = T_VERSION;
progDump["functions"].push_back(DumpJson());
progDump["entryhash"] = this->GetFunctionHash().Data();
file << progDump.dump(1) << std::endl;
file.close();
}
struct RawTensorCompare {
bool operator()(const std::shared_ptr<RawTensor>& a, const std::shared_ptr<RawTensor>& b) const
{
return a->rawmagic < b->rawmagic;
}
};
struct TensorCompare {
bool operator()(const std::shared_ptr<LogicalTensor>& a, const std::shared_ptr<LogicalTensor>& b) const
{
return a->magic < b->magic;
}
};
Json Function::DumpJson(bool useTable)
{
Json funcJson;
funcJson[T_FIELD_KIND] = static_cast<int>(Kind::T_KIND_FUNCTION);
funcJson["rawname"] = funcRawName_;
funcJson["funcmagic"] = GetFuncMagic();
if (parent_ != nullptr) {
funcJson["parent_funcmagic"] = parent_->GetFuncMagic();
}
funcJson["functype"] = functionType_;
funcJson["graphtype"] = graphType_;
funcJson["func_magicname"] = funcMagicName_;
funcJson["_opseed"] = opSeed_;
funcJson["_rawid"] = IdGen<IdType::RAW_TENSOR>::Inst().CurId();
funcJson["_funcid"] = IdGen<IdType::FUNCTION>::Inst().CurId();
funcJson["_sg_pg_lowerbound"] = paramConfigs_.sgPgLowerBound;
funcJson["_sg_parallel_num"] = paramConfigs_.sgParallelNum;
funcJson["_sg_partition_algorithm"] = paramConfigs_.sgPartitionAlgorithm;
funcJson["_sg_mg_copyin_upper_bound"] = paramConfigs_.sgMgCopyInUpperBound;
funcJson["_mg_vec_parallel_lb"] = paramConfigs_.mgVecParallelLb;
funcJson["_total_subgraph_count"] = totalSubGraphCount_;
funcJson["_auto_mix_partition"] = paramConfigs_.autoMixPartition;
funcJson["_ooo_preschedule_method"] = paramConfigs_.OoOPreScheduleMethod;
if (!span_.IsUnknown()) {
funcJson["file"] = span_.Filename();
funcJson["line"] = span_.BeginLine();
}
if (useTable) {
std::vector<std::pair<int, std::vector<int>>> incasts;
std::vector<std::pair<int, std::vector<int>>> outcasts;
size_t inSize = inCasts_.size();
for (size_t i = 0; i < inSize; i++) {
std::pair<int, std::vector<int>> incast;
incast.first = inCasts_[i]->GetMagic();
if (slotScope_ != nullptr && i < slotScope_->ioslot.incastSlot.size()) {
incast.second = slotScope_->ioslot.incastSlot[i];
} else {
incast.second = std::vector<int>();
}
incasts.push_back(incast);
}
size_t outSize = outCasts_.size();
for (size_t i = 0; i < outSize; i++) {
std::pair<int, std::vector<int>> outcast;
outcast.first = outCasts_[i]->GetMagic();
if (slotScope_ != nullptr && i < slotScope_->ioslot.outcastSlot.size()) {
outcast.second = slotScope_->ioslot.outcastSlot[i];
} else {
std::vector<int> emptyOutcast;
outcast.second = emptyOutcast;
}
outcasts.push_back(outcast);
}
funcJson["incasts"] = incasts;
funcJson["outcasts"] = outcasts;
} else {
Json incasts = Json::array();
Json outcasts = Json::array();
Json globalTensors = Json::array();
for (auto& i : inCasts_) {
incasts.push_back(i->DumpJson(*this, true));
}
for (auto& o : outCasts_) {
outcasts.push_back(o->DumpJson(*this, true));
}
funcJson["incasts"] = incasts;
funcJson["outcasts"] = outcasts;
}
std::set<int> globalTensorSet;
for (auto& t : globalTensors_) {
globalTensorSet.emplace(t->GetMagic());
}
std::vector<int> globalTensorVec;
for (auto& tMagic : globalTensorSet) {
globalTensorVec.emplace_back(tMagic);
}
funcJson["global_tensors"] = globalTensorVec;
funcJson["static"]["global_tensors"] = funcJson["global_tensors"];
Json operations = Json::array();
if (useTable) {
for (const auto& op : operations_) {
operations.push_back(op->DumpJson(false));
}
} else {
for (const auto& op : operations_) {
operations.push_back(op->DumpJson(true));
}
}
funcJson["operations"] = operations;
funcJson["hash"] = functionHash_.Data();
if (leafFuncAttr_ != nullptr && leafFuncAttr_->coreType != CoreType::INVALID) {
funcJson["leaf_func_attr"]["coretype"] = leafFuncAttr_->coreType;
}
if (rootFunc_ != nullptr) {
funcJson["root_func_magic"] = rootFunc_->GetFuncMagic();
}
if (!programs_.empty()) {
Json programsJson;
for (auto& ele : programs_) {
programsJson[ele.first] = ele.second->GetFuncMagic();
}
funcJson["programs"] = programsJson;
funcJson["topo"] = topoInfo_.DumpJson();
funcJson["static"]["topo"] = funcJson["topo"];
}
if (graphType_ == GraphType::BLOCK_GRAPH) {
funcJson["subfunc_param"] = parameter_.ToJson();
funcJson["static"]["subfunc_param"] = funcJson["subfunc_param"];
}
auto aicIt = readySubGraphIds_.find(CoreType::AIC);
if (aicIt != readySubGraphIds_.end() && !aicIt->second.empty()) {
funcJson["aic_ready_subgraph_ids"] = aicIt->second;
funcJson["static"]["aic_ready_subgraph_ids"] = funcJson["aic_ready_subgraph_ids"];
}
auto aivIt = readySubGraphIds_.find(CoreType::AIV);
if (aivIt != readySubGraphIds_.end() && !aivIt->second.empty()) {
funcJson["aiv_ready_subgraph_ids"] = aivIt->second;
funcJson["static"]["aiv_ready_subgraph_ids"] = funcJson["aiv_ready_subgraph_ids"];
}
auto aicpuIt = readySubGraphIds_.find(CoreType::AICPU);
if (aicpuIt != readySubGraphIds_.end() && !aicpuIt->second.empty()) {
funcJson["aicpu_ready_subgraph_ids"] = aicpuIt->second;
funcJson["static"]["aicpu_ready_subgraph_ids"] = funcJson["aicpu_ready_subgraph_ids"];
}
if (useTable) {
std::set<std::shared_ptr<RawTensor>, RawTensorCompare> rawTensorSet;
std::set<std::shared_ptr<LogicalTensor>, TensorCompare> tensorSet;
for (const auto& incast : inCasts_) {
tensorSet.insert(incast);
rawTensorSet.insert(incast->tensor);
}
for (const auto& outcast : outCasts_) {
tensorSet.insert(outcast);
rawTensorSet.insert(outcast->tensor);
}
for (const auto& op : operations_) {
for (auto& i : op->GetIOperands()) {
tensorSet.insert(i);
rawTensorSet.insert(i->tensor);
}
for (auto& o : op->GetOOperands()) {
tensorSet.insert(o);
rawTensorSet.insert(o->tensor);
}
}
std::vector<std::shared_ptr<RawTensor>> rawTensorList(rawTensorSet.begin(), rawTensorSet.end());
std::vector<std::shared_ptr<LogicalTensor>> tensorList(tensorSet.begin(), tensorSet.end());
std::sort(rawTensorList.begin(), rawTensorList.end(), [](auto l, auto r) {
return l->GetRawMagic() < r->GetRawMagic();
});
std::sort(tensorList.begin(), tensorList.end(), [](auto l, auto r) { return l->GetMagic() < r->GetMagic(); });
Json rawtensors = Json::array();
Json tensors = Json::array();
for (auto& rawTensor : rawTensorList) {
rawtensors.push_back(rawTensor->DumpJson());
}
for (auto& tensor : tensorList) {
tensors.push_back(tensor->DumpJson(*this, false));
}
funcJson["rawtensors"] = rawtensors;
funcJson["tensors"] = tensors;
}
if (functionType_ == FunctionType::DYNAMIC_LOOP) {
auto loopAttr = GetDynloopAttribute();
if (loopAttr != nullptr) {
std::string itername = loopAttr->iterSymbolName;
funcJson["dynamic"]["itername"] = itername;
SymbolicScalar begin = loopAttr->Begin();
auto jbegin = ToJson(begin);
if (jbegin.size() > 0) {
funcJson["dynamic"]["begin"] = jbegin;
}
SymbolicScalar end = loopAttr->End();
auto jend = ToJson(end);
if (jend.size() > 0) {
funcJson["dynamic"]["end"] = jend;
}
SymbolicScalar step = loopAttr->Step();
auto jstep = ToJson(step);
if (jstep.size() > 0) {
funcJson["dynamic"]["step"] = jstep;
}
SymbolicScalar originalBegin = loopAttr->originalRange.Begin();
auto jOriBegin = ToJson(originalBegin);
if (jOriBegin.size() > 0) {
funcJson["dynamic"]["originalBegin"] = jOriBegin;
}
SymbolicScalar originalEnd = loopAttr->originalRange.End();
auto jOriEnd = ToJson(originalEnd);
if (jOriEnd.size() > 0) {
funcJson["dynamic"]["originalEnd"] = jOriEnd;
}
int unrollTimes = loopAttr->unrollTimes;
funcJson["dynamic"]["unrollTimes"] = unrollTimes;
Json loopFuncPathList = Json::array();
for (auto& path : loopAttr->pathList) {
Json pathJson = Json::array();
auto opmagic = path.callop->GetOpMagic();
for (auto& pathCond : path.pathCondList) {
Json pathCondJson = Json::array();
SymbolicScalar cond = pathCond.GetCond();
auto jcond = ToJson(cond);
pathCondJson.push_back(jcond);
pathCondJson.push_back(pathCond.IsSat());
pathJson.push_back(pathCondJson);
}
if (pathJson.size() > 0) {
loopFuncPathList.push_back({opmagic, pathJson});
}
}
if (loopFuncPathList.size() > 0) {
funcJson["dynamic"]["paths"] = loopFuncPathList;
}
}
}
return funcJson;
}
void Function::LoadTensorJson(
const std::shared_ptr<Function>& func, const Json& tensorJson,
const std::unordered_map<int, std::shared_ptr<RawTensor>>& rawTensorDict,
std::unordered_map<int, std::shared_ptr<LogicalTensor>>& tensorDict)
{
if (tensorJson.count("tensors") != 0) {
for (auto& tensorDump : tensorJson["tensors"]) {
std::shared_ptr<LogicalTensor> tensor = LogicalTensor::LoadJson(*func, rawTensorDict, tensorDump);
tensorDict[tensor->GetMagic()] = tensor;
}
}
for (auto& iDump : tensorJson["incasts"]) {
if (!iDump[0].is_number()) {
std::shared_ptr<LogicalTensor> tensor = LogicalTensor::LoadJson(*func, rawTensorDict, iDump);
tensorDict[tensor->GetMagic()] = tensor;
}
}
for (auto& oDump : tensorJson["outcasts"]) {
if (!oDump[0].is_number()) {
std::shared_ptr<LogicalTensor> tensor = LogicalTensor::LoadJson(*func, rawTensorDict, oDump);
tensorDict[tensor->GetMagic()] = tensor;
}
}
for (auto& tDump : tensorJson["global_tensors"]) {
int magic = tDump.get<int>();
auto& t = tensorDict[magic];
func->globalTensors_.emplace(t);
}
for (auto& iDump : tensorJson["incasts"]) {
int magic = iDump[0].is_number() ? iDump[0].get<int>() : iDump["magic"].get<int>();
auto& in = tensorDict[magic];
func->inCasts_.push_back(in);
func->GetTensorMap().Insert(in);
}
for (auto& oDump : tensorJson["outcasts"]) {
int magic = oDump[0].is_number() ? oDump[0].get<int>() : oDump["magic"].get<int>();
func->outCasts_.push_back(tensorDict[magic]);
}
for (auto& ele : tensorDict) {
func->GetTensorMap().Insert(ele.second, false);
}
for (auto& opDump : tensorJson["operations"]) {
auto op = Operation::LoadJson(*func, tensorDict, opDump);
func->operations_.push_back(op);
}
}
std::shared_ptr<Function> Function::LoadJson(Program& belongTo, const Json& funcJson)
{
FE_ASSERT(FeError::INVALID_VAL, funcJson[T_FIELD_KIND].get<int>() == static_cast<int>(Kind::T_KIND_FUNCTION))
<< "Invalid function kind in JSON";
int funcmagic = funcJson["funcmagic"].get<int>();
std::string rawname = funcJson["rawname"].get<std::string>();
std::shared_ptr<Function> func =
std::make_shared<Function>(belongTo, rawname + "_" + std::to_string(funcmagic), rawname, nullptr);
func->funcMagicName_ = funcJson["func_magicname"];
func->functionMagic_ = funcmagic;
func->functionType_ = static_cast<FunctionType>(funcJson["functype"].get<int>());
func->graphType_ = static_cast<GraphType>(funcJson["graphtype"].get<int>());
func->sorted_ = true;
std::unordered_map<int, std::shared_ptr<RawTensor>> rawTensorDict;
if (funcJson.count("rawtensors") != 0) {
for (auto& rawTensorDump : funcJson["rawtensors"]) {
std::shared_ptr<RawTensor> rawTensor = RawTensor::LoadJson(rawTensorDump);
rawTensorDict[rawTensor->rawmagic] = rawTensor;
}
}
for (auto& iDump : funcJson["incasts"]) {
if (!iDump[0].is_number() && !iDump[T_FIELD_RAWTENSOR].is_number()) {
std::shared_ptr<RawTensor> rawTensor = RawTensor::LoadJson(iDump[T_FIELD_RAWTENSOR]);
rawTensorDict[rawTensor->rawmagic] = rawTensor;
}
}
for (auto& oDump : funcJson["outcasts"]) {
if (!oDump[0].is_number() && !oDump[T_FIELD_RAWTENSOR].is_number()) {
std::shared_ptr<RawTensor> rawTensor = RawTensor::LoadJson(oDump[T_FIELD_RAWTENSOR]);
rawTensorDict[rawTensor->rawmagic] = rawTensor;
}
}
std::unordered_map<int, std::shared_ptr<LogicalTensor>> tensorDict;
LoadTensorJson(func, funcJson, rawTensorDict, tensorDict);
func->opSeed_ = funcJson["_opseed"].get<int>();
IdGen<IdType::RAW_TENSOR>::Inst().SetId(funcJson["_rawid"].get<int>());
int funcid = funcJson["_funcid"].get<int>();
IdGen<IdType::FUNCTION>::Inst().SetId(funcid);
func->paramConfigs_.sgPgLowerBound = funcJson["_sg_pg_lowerbound"].get<int>();
func->paramConfigs_.sgParallelNum = funcJson["_sg_parallel_num"].get<int>();
func->paramConfigs_.sgPartitionAlgorithm = funcJson["_sg_partition_algorithm"].get<std::string>();
func->paramConfigs_.sgMgCopyInUpperBound = funcJson["_sg_mg_copyin_upper_bound"].get<int>();
func->paramConfigs_.mgVecParallelLb = funcJson["_mg_vec_parallel_lb"].get<int>();
func->paramConfigs_.autoMixPartition = funcJson["_auto_mix_partition"].get<int>();
auto subGraphCount = funcJson["_total_subgraph_count"].get<size_t>();
func->SetTotalSubGraphCount(subGraphCount);
std::vector<std::vector<int>> incastSlot;
for (auto& iDump : funcJson["incasts"]) {
std::vector<int> iSlot;
if (iDump[0].is_number()) {
for (auto& slot : iDump[1]) {
iSlot.push_back(slot.get<int>());
}
incastSlot.push_back(iSlot);
}
}
std::vector<std::vector<int>> outcastSlot;
for (auto& oDump : funcJson["outcasts"]) {
std::vector<int> oSlot;
if (oDump[0].is_number()) {
for (auto& slot : oDump[1]) {
oSlot.push_back(slot.get<int>());
}
outcastSlot.push_back(oSlot);
}
}
IncastOutcastSlot ioSlot;
ioSlot.incastSlot = incastSlot;
ioSlot.outcastSlot = outcastSlot;
std::shared_ptr<TensorSlotScope> tensorSlotScope = std::make_shared<TensorSlotScope>(func.get());
tensorSlotScope->ioslot = ioSlot;
func->slotScope_ = tensorSlotScope;
func->ComputeHashOrderless();
func->functionHash_ = std::stoull(funcJson["hash"].get<std::string>());
if (func->GetGraphType() == GraphType::BLOCK_GRAPH && func->GetLeafFuncAttribute() == nullptr) {
std::shared_ptr<LeafFuncAttribute> attr = std::make_shared<LeafFuncAttribute>();
func->SetLeafFuncAttribute(attr);
}
if (funcJson.count("leaf_func_attr") != 0 && funcJson["leaf_func_attr"].count("coretype") != 0) {
std::shared_ptr<LeafFuncAttribute> attr = func->GetLeafFuncAttribute();
attr->coreType = static_cast<CoreType>(funcJson["leaf_func_attr"]["coretype"].get<int>());
}
if (funcJson.count("root_func_magic") != 0) {
func->rootFunc_ = belongTo.GetFunctionByMagic(funcJson["root_func_magic"].get<int>()).get();
}
if (funcJson.count("programs") != 0) {
uint64_t index = 0;
for (auto& programMagic : funcJson["programs"]) {
func->programs_.emplace(
std::make_pair(index++, belongTo.GetFunctionByMagic(programMagic.get<int>()).get()));
}
}
if (funcJson.count("topo") != 0) {
func->topoInfo_.LoadJson(funcJson["topo"]);
}
if (funcJson.count("subfunc_param") != 0) {
func->parameter_.FromJson(funcJson["subfunc_param"]);
}
if (funcJson.count("aic_ready_subgraph_ids") != 0) {
func->SetReadySubGraphIds(CoreType::AIC, funcJson["aic_ready_subgraph_ids"].get<std::vector<int>>());
}
if (funcJson.count("aiv_ready_subgraph_ids") != 0) {
func->SetReadySubGraphIds(CoreType::AIV, funcJson["aiv_ready_subgraph_ids"].get<std::vector<int>>());
}
if (funcJson.count("aicpu_ready_subgraph_ids") != 0) {
func->SetReadySubGraphIds(CoreType::AICPU, funcJson["aicpu_ready_subgraph_ids"].get<std::vector<int>>());
}
if (funcJson.count("dynamic") != 0) {
auto iterName = funcJson["dynamic"]["itername"];
auto beginJson = funcJson["dynamic"]["begin"];
SymbolicScalar begin = LoadSymbolicScalar(beginJson);
auto endJson = funcJson["dynamic"]["end"];
SymbolicScalar end = LoadSymbolicScalar(endJson);
auto stepJson = funcJson["dynamic"]["step"];
SymbolicScalar step = LoadSymbolicScalar(stepJson);
LoopRange range(begin, end, step);
auto originalBeginJson = funcJson["dynamic"]["originalBegin"];
SymbolicScalar originalBegin = LoadSymbolicScalar(originalBeginJson);
auto originalEndJson = funcJson["dynamic"]["originalEnd"];
SymbolicScalar originalEnd = LoadSymbolicScalar(originalEndJson);
LoopRange originalRange(originalBegin, originalEnd);
auto attr = std::make_shared<DynloopFunctionAttribute>(iterName, range, originalRange);
attr->unrollTimes = funcJson["dynamic"]["unrollTimes"];
auto dynFuncDump = funcJson["dynamic"];
if (dynFuncDump.count("paths") != 0) {
std::vector<DynloopFunctionPath> pathList;
auto pathsJson = funcJson["dynamic"]["paths"];
for (auto& pathJson : pathsJson) {
Function* root = func.get();
std::vector<DynloopFunctionPathCondition> pathCondList;
auto callOpMagic = pathJson[0];
Operation* callop = nullptr;
for (auto& op : func->Operations().DuplicatedOpList()) {
if (op->GetOpMagic() == callOpMagic) {
callop = op;
break;
}
}
for (auto& pathCondJson : pathJson[1]) {
bool isSat = static_cast<bool>(pathCondJson[1]);
SymbolicScalar cond = LoadSymbolicScalar(pathCondJson[0]);
DynloopFunctionPathCondition pathCond;
pathCond.isSat_ = isSat;
pathCond.cond_ = cond;
pathCondList.push_back(pathCond);
}
DynloopFunctionPath path(root, pathCondList, callop);
pathList.push_back(path);
}
attr->pathList = pathList;
}
func->SetDynloopAttribute(attr);
}
func->RefreshOpPosition();
return func;
}
static const SymbolicScalar RUNTIME_COA_GetOffset = AddRuntimeCoaPrefix("GET_PARAM_OFFSET");
static const SymbolicScalar RUNTIME_COA_GetValidShape = AddRuntimeCoaPrefix("GET_PARAM_VALID_SHAPE");
static const SymbolicScalar RUNTIME_COA_GetRawShape = AddRuntimeCoaPrefix("GET_PARAM_RAW_SHAPE");
static const SymbolicScalar RUNTIME_COA_GetParam = AddRuntimeCoaPrefix("GET_PARAM");
static int64_t MakeTensorIndex(int64_t magic)
{
return magic | (1UL << 62);
}
static void MaybeNormalizeValue(
const SymbolicScalar& coaFunc, std::vector<SymbolicScalar>& operandCoaList, int operandCoaIndex,
std::vector<OpImmediate>& opImmList, int coaIndex, bool valueToIndex)
{
for (size_t dimIndex = 0; dimIndex < opImmList.size(); dimIndex++) {
auto& opImm = opImmList[dimIndex];
SymbolicScalar scalar = opImm.GetSpecifiedValue();
auto getTensorDataDict = GetTensorDataDict(scalar);
if (getTensorDataDict.size() == 0) {
OpImmediate::NormalizeValue(
operandCoaList[operandCoaIndex + dimIndex], opImm, coaFunc(opImmList.size(), coaIndex, dimIndex),
valueToIndex);
}
}
};
static void MaybeNormalizeValue(
std::vector<SymbolicScalar>& valueCoa, SymbolicScalar& value, int coaIndex, bool valueToIndex)
{
auto getTensorDataDict = GetTensorDataDict(value);
if (getTensorDataDict.size() == 0) {
valueCoa.push_back(value);
if (valueToIndex) {
value = RUNTIME_COA_GetParam(coaIndex);
}
}
}
static void NormalizeReshapeCopyDynValidShape(
Operation* op, std::vector<std::vector<SymbolicScalar>>& coaLists, int& coaIndex, bool valueToIndex)
{
Opcode opcode = op->GetOpcode();
if (opcode != Opcode::OP_RESHAPE_COPY_OUT &&
opcode != Opcode::OP_RESHAPE_COPY_IN &&
opcode != Opcode::OP_L0C_RESHAPE_COPY_OUT &&
opcode != Opcode::OP_L1_RESHAPE_COPY_IN) {
return;
}
auto copyAttr = std::static_pointer_cast<CopyOpAttribute>(op->GetOpAttribute());
FE_ASSERT(FeError::INVALID_PTR, copyAttr != nullptr)
<< "Normalize reshape copy dyn valid shape failed: copyAttr is null.\n"
<< "Operation: " << op->Dump();
bool useToDynValidShape = op->GetOpcode() == Opcode::OP_RESHAPE_COPY_OUT || op->GetOpcode() == Opcode::OP_L0C_RESHAPE_COPY_OUT;
const char* dynValidShapeName = useToDynValidShape ? "toDynValidShape" : "fromDynValidShape";
auto opImmList = useToDynValidShape ? copyAttr->GetToDynValidShape() : copyAttr->GetFromDynValidShape();
FE_ASSERT(FeError::INVALID_PTR, !opImmList.empty())
<< "Normalize reshape copy dyn valid shape failed: " << dynValidShapeName << " is empty.\n"
<< "Operation: " << op->Dump();
std::vector<SymbolicScalar> dynValidShape = OpImmediate::ToSpecified(opImmList);
std::vector<SymbolicScalar> valueCoaList;
for (auto& value : dynValidShape) {
MaybeNormalizeValue(valueCoaList, value, coaIndex + static_cast<int>(valueCoaList.size()), valueToIndex);
}
if (useToDynValidShape) {
copyAttr->SetToDynValidShape(OpImmediate::Specified(dynValidShape));
} else {
copyAttr->SetFromDynValidShape(OpImmediate::Specified(dynValidShape));
}
coaLists.emplace_back(valueCoaList);
coaIndex += valueCoaList.size();
}
static std::vector<SymbolicScalar> NormalizeCopyIn(Operation* op, int coaIndexBase, bool valueToIndex)
{
auto copyAttr = std::static_pointer_cast<CopyOpAttribute>(op->GetOpAttribute());
int dim = copyAttr->GetShape().size();
int operandCoaIndex = COA_INDEX_DIM_BASE;
int coaIndex = coaIndexBase + COA_INDEX_DIM_BASE;
int dimCount = dim * 0x3 + copyAttr->GetToDynValidShape().size();
std::vector<SymbolicScalar> operandCoaList(COA_INDEX_DIM_BASE + dimCount, 0);
operandCoaList[0] = MakeTensorIndex(op->GetIOperands()[0]->GetRawMagic());
auto opImmList = copyAttr->GetFromOffset();
MaybeNormalizeValue(RUNTIME_COA_GetOffset, operandCoaList, operandCoaIndex, opImmList, coaIndexBase, valueToIndex);
copyAttr->SetFromOffset(opImmList);
operandCoaIndex += dim;
coaIndex += dim;
opImmList = copyAttr->GetShape();
OpImmediate::NormalizeValue(operandCoaList, operandCoaIndex, opImmList, coaIndex, valueToIndex);
copyAttr->SetShape(opImmList);
operandCoaIndex += dim;
coaIndex += dim;
opImmList = copyAttr->GetRawShape();
MaybeNormalizeValue(
RUNTIME_COA_GetRawShape, operandCoaList, operandCoaIndex, opImmList, coaIndexBase, valueToIndex);
copyAttr->SetRawShape(opImmList);
operandCoaIndex += dim;
coaIndex += dim;
opImmList = copyAttr->GetToDynValidShape();
if (op->GetOpcode() == Opcode::OP_L1_COPY_IN_CONV) {
std::vector<SymbolicScalar> valueCoaList;
for (auto validshape : OpImmediate::ToSpecified(opImmList)) {
MaybeNormalizeValue(valueCoaList, validshape, coaIndex, valueToIndex);
coaIndex += 1;
}
operandCoaList.erase(operandCoaList.end() - valueCoaList.size(), operandCoaList.end());
operandCoaList.insert(operandCoaList.end(), valueCoaList.begin(), valueCoaList.end());
} else {
MaybeNormalizeValue(
RUNTIME_COA_GetValidShape, operandCoaList, operandCoaIndex, opImmList, coaIndexBase, valueToIndex);
}
copyAttr->SetToDynValidShape(opImmList);
return operandCoaList;
}
static std::vector<SymbolicScalar> NormalizeCopyOut(Operation* op, int coaIndexBase, bool valueToIndex)
{
auto copyAttr = std::static_pointer_cast<CopyOpAttribute>(op->GetOpAttribute());
int dim = copyAttr->GetShape().size();
int operandCoaIndex = COA_INDEX_DIM_BASE;
int coaIndex = coaIndexBase + COA_INDEX_DIM_BASE;
int dimCount = dim * 0x3 + copyAttr->GetFromDynValidShape().size();
std::vector<SymbolicScalar> operandCoaList(COA_INDEX_DIM_BASE + dimCount, 0);
operandCoaList[0] = MakeTensorIndex(op->GetOOperands()[0]->GetRawMagic());
auto opImmList = copyAttr->GetToOffset();
MaybeNormalizeValue(RUNTIME_COA_GetOffset, operandCoaList, operandCoaIndex, opImmList, coaIndexBase, valueToIndex);
copyAttr->SetToOffset(opImmList);
operandCoaIndex += dim;
coaIndex += dim;
opImmList = copyAttr->GetShape();
OpImmediate::NormalizeValue(operandCoaList, operandCoaIndex, opImmList, coaIndex, valueToIndex);
copyAttr->SetShape(opImmList);
operandCoaIndex += dim;
coaIndex += dim;
opImmList = copyAttr->GetRawShape();
MaybeNormalizeValue(
RUNTIME_COA_GetRawShape, operandCoaList, operandCoaIndex, opImmList, coaIndexBase, valueToIndex);
copyAttr->SetRawShape(opImmList);
operandCoaIndex += dim;
coaIndex += dim;
opImmList = copyAttr->GetFromDynValidShape();
if (op->GetOpcode() == Opcode::OP_L0C_COPY_OUT_CONV) {
std::vector<SymbolicScalar> valueCoaList;
for (auto validshape : OpImmediate::ToSpecified(opImmList)) {
MaybeNormalizeValue(valueCoaList, validshape, coaIndex, valueToIndex);
coaIndex += 1;
}
operandCoaList.erase(operandCoaList.end() - valueCoaList.size(), operandCoaList.end());
operandCoaList.insert(operandCoaList.end(), valueCoaList.begin(), valueCoaList.end());
} else {
MaybeNormalizeValue(
RUNTIME_COA_GetValidShape, operandCoaList, operandCoaIndex, opImmList, coaIndexBase, valueToIndex);
}
copyAttr->SetFromDynValidShape(opImmList);
return operandCoaList;
}
static std::vector<SymbolicScalar> NormalizeTensor(
LogicalTensorPtr operand, int coaIndexBase, bool valueToIndex, bool isNop = false)
{
auto offset = OpImmediate::Specified(operand->GetOffset());
auto dynOffset = OpImmediate::Specified(operand->GetDynOffset());
auto shape = OpImmediate::Specified(operand->GetShape());
auto rawshape = OpImmediate::Specified(operand->GetRawTensor()->GetRawShape());
auto dynRawshape = OpImmediate::Specified(operand->GetRawTensor()->GetDynRawShape());
auto dynValidShape = OpImmediate::Specified(operand->GetDynValidShape());
if (isNop) {
offset = OpImmediate::Specified(Offset(operand->GetShape().size()));
dynOffset = OpImmediate::Specified(Offset(operand->GetShape().size()));
shape = OpImmediate::Specified(Shape(operand->GetShape().size()));
dynValidShape = OpImmediate::Specified(Shape(operand->GetShape().size()));
}
int dim = shape.size();
int operandCoaIndex = COA_INDEX_DIM_BASE;
int coaIndex = coaIndexBase + COA_INDEX_DIM_BASE;
std::vector<SymbolicScalar> operandCoaList(COA_INDEX_DIM_BASE + dim * COA_INDEX_TYPE_COUNT, 0);
if (operand->GetMemoryTypeToBe() == MemoryType::MEM_DEVICE_DDR) {
operandCoaList[0] = MakeTensorIndex(operand->GetRawMagic());
}
if (!dynOffset.empty()) {
MaybeNormalizeValue(
RUNTIME_COA_GetOffset, operandCoaList, operandCoaIndex, dynOffset, coaIndexBase, valueToIndex);
operand->UpdateOffset(TensorOffset{operand->GetOffset(), OpImmediate::ToSpecified(dynOffset)});
} else {
OpImmediate::NormalizeValue(operandCoaList, operandCoaIndex, offset, coaIndex, false);
}
operandCoaIndex += dim;
coaIndex += dim;
OpImmediate::NormalizeValue(operandCoaList, operandCoaIndex, shape, coaIndex, false);
operandCoaIndex += dim;
coaIndex += dim;
if (!dynRawshape.empty()) {
OpImmediate::NormalizeValue(operandCoaList, operandCoaIndex, dynRawshape, coaIndex, valueToIndex);
} else {
OpImmediate::NormalizeValue(operandCoaList, operandCoaIndex, rawshape, coaIndex, false);
}
operandCoaIndex += dim;
coaIndex += dim;
if (!dynValidShape.empty()) {
MaybeNormalizeValue(
RUNTIME_COA_GetValidShape, operandCoaList, operandCoaIndex, dynValidShape, coaIndexBase, valueToIndex);
operand->UpdateDynValidShape(OpImmediate::ToSpecified(dynValidShape));
} else {
OpImmediate::NormalizeValue(operandCoaList, operandCoaIndex, shape, coaIndex, false);
}
operandCoaIndex += dim;
coaIndex += dim;
return operandCoaList;
}
void Function::GetOutcastSymbolicExpr(std::map<int, SymbolicScalar>& tabel)
{
for (size_t idx = 0; idx < outCasts_.size(); idx++) {
auto op = *outCasts_[idx]->GetProducers().begin();
if (op->GetOpcode() == Opcode::OP_BIND_TENSOR) {
if (op->HasAttr(OpAttributeKey::bindTensor) && (op->GetOOperands().size() == 1UL)) {
tabel[idx] = op->GetSymbolicScalarAttribute(OpAttributeKey::bindTensor);
}
}
}
}
static bool isAtomicOp(Operation* op) { return op->HasAttr("op_attr_atomic_add"); }
std::vector<std::vector<SymbolicScalar>> Function::NormalizeCoa(
std::vector<OperandAttribute>& iOpAttr, std::vector<OperandAttribute>& oOpAttr)
{
std::unordered_map<int, Operation*> opmagicToOp;
std::unordered_map<LogicalTensorPtr, int> processedOperands;
opmagicToOp.reserve(operations_.size());
for (auto& op : operations_) {
opmagicToOp[op->GetOpMagic()] = op.get();
}
int coaIndex = COA_INDEX_BASE;
std::vector<std::vector<SymbolicScalar>> coaLists;
coaLists.reserve(incastPosition.size() + outcastPosition.size());
NormalizeCoaForInCasts(iOpAttr, coaLists, coaIndex, processedOperands, opmagicToOp);
NormalizeCoaForOutCasts(oOpAttr, coaLists, coaIndex, processedOperands, opmagicToOp);
NormalizeCoaForNormalOperands(coaLists, coaIndex, processedOperands);
NormalizeCoaForSpecialInfo(coaLists, coaIndex);
return coaLists;
}
static bool IsInplaceIncast(Operation* op, std::vector<Operation*>& copyInList)
{
if (op->GetOpcode() != Opcode::OP_VIEW) {
return false;
}
LogicalTensorPtr data = op->GetOOperands()[0];
copyInList.clear();
for (auto oop : data->GetConsumers()) {
if (OpcodeManager::Inst().IsSharedMemory(oop->GetOpcode())) {
return false;
}
if (IsCopyIn(oop->GetOpcode())) {
copyInList.push_back(oop);
}
}
return true;
}
void Function::NormalizeCoaForInCasts(
std::vector<OperandAttribute>& iOpAttr, std::vector<std::vector<SymbolicScalar>>& coaLists, int& coaIndex,
std::unordered_map<LogicalTensorPtr, int>& processedOperands,
const std::unordered_map<int, Operation*>& opmagicToOp)
{
bool valueToIndex = parent_->GetFunctionType() == FunctionType::DYNAMIC_LOOP_PATH;
iOpAttr.reserve(incastPosition.size());
for (auto [opmagic, k] : incastPosition) {
auto op = opmagicToOp.at(opmagic);
if (op->GetIOpAttrOffset(k) != -1) {
continue;
}
std::vector<SymbolicScalar> operandCoaList;
if (IsCopyIn(op->GetOpcode()) && k == 0) {
operandCoaList = NormalizeCopyIn(op, coaIndex, valueToIndex);
if (CheckEmuOpcode(op, EMUOP_TENSOR_GETDATA_DEPEND)) {
GetTensorDataSetCoaIndex(op, coaIndex);
}
} else {
std::vector<Operation*> copyInList;
auto& consOp = *(op->GetOOperands()[0])->GetConsumers().begin();
if (!this->IsFromOutCast(op->GetOOperands().front()) && IsCopyIn(consOp->GetOpcode()) &&
IsInplaceIncast(op, copyInList)) {
for (auto copyIn : copyInList) {
operandCoaList = NormalizeCopyIn(copyIn, coaIndex, valueToIndex);
copyIn->SetIOpAtt(k, coaIndex);
iOpAttr.emplace_back(coaIndex);
coaIndex += operandCoaList.size();
coaLists.emplace_back(std::move(operandCoaList));
}
continue;
}
auto iOperand = op->GetInputOperand(k);
auto it = processedOperands.find(iOperand);
if (it != processedOperands.end()) {
op->SetIOpAtt(k, it->second);
iOpAttr.emplace_back(it->second);
continue;
}
operandCoaList = NormalizeTensor(iOperand, coaIndex, false, op->GetOpcode() == Opcode::OP_NOP);
processedOperands.emplace(iOperand, coaIndex);
}
op->SetIOpAtt(k, coaIndex);
iOpAttr.emplace_back(coaIndex);
coaIndex += operandCoaList.size();
coaLists.emplace_back(std::move(operandCoaList));
}
}
void Function::NormalizeCoaForOutCasts(
std::vector<OperandAttribute>& oOpAttr, std::vector<std::vector<SymbolicScalar>>& coaLists, int& coaIndex,
std::unordered_map<LogicalTensorPtr, int>& processedOperands,
const std::unordered_map<int, Operation*>& opmagicToOp)
{
bool valueToIndex = parent_->GetFunctionType() == FunctionType::DYNAMIC_LOOP_PATH;
oOpAttr.reserve(outcastPosition.size());
for (auto [opmagic, k] : outcastPosition) {
auto op = opmagicToOp.at(opmagic);
if (op->GetOOpAttrOffset(k) != -1) {
continue;
}
bool isAtomic = isAtomicOp(op);
std::vector<SymbolicScalar> operandCoaList;
if (IsCopyOut(op->GetOpcode()) && k == 0) {
operandCoaList = NormalizeCopyOut(op, coaIndex, valueToIndex);
} else {
auto oOperand = op->GetOutputOperand(k);
auto it = processedOperands.find(oOperand);
if (it != processedOperands.end()) {
op->SetOOpAtt(k, it->second, isAtomic);
oOpAttr.emplace_back(it->second, isAtomic);
continue;
}
operandCoaList = NormalizeTensor(oOperand, coaIndex, false);
processedOperands.emplace(oOperand, coaIndex);
}
op->SetOOpAtt(k, coaIndex, isAtomic);
oOpAttr.emplace_back(coaIndex, isAtomic);
coaIndex += operandCoaList.size();
coaLists.emplace_back(std::move(operandCoaList));
}
}
void Function::NormalizeCoaForNormalOperands(
std::vector<std::vector<SymbolicScalar>>& coaLists, int& coaIndex,
std::unordered_map<LogicalTensorPtr, int>& processedOperands)
{
std::unordered_set<LogicalTensorPtr> inOutCasts;
inOutCasts.insert(inCasts_.begin(), inCasts_.end());
inOutCasts.insert(outCasts_.begin(), outCasts_.end());
bool valueToIndex = parent_->GetFunctionType() == FunctionType::DYNAMIC_LOOP_PATH;
for (auto& op : operations_) {
if (op->GetOpcode() == Opcode::OP_NOP) {
continue;
}
for (size_t i = 0; i < op->GetInputOperandSize(); i++) {
auto iOperand = op->GetInputOperand(i);
if ((op->GetIOpAttrOffset(i) != -1) || (inOutCasts.count(iOperand) > 0)) {
continue;
}
auto it = processedOperands.find(iOperand);
if (it != processedOperands.end()) {
op->SetIOpAtt(i, it->second);
continue;
}
if (!iOperand->GetDynOffset().empty() || !iOperand->GetDynValidShape().empty()) {
auto operandCoaList = NormalizeTensor(iOperand, coaIndex, valueToIndex);
processedOperands.emplace(iOperand, coaIndex);
coaIndex += operandCoaList.size();
coaLists.emplace_back(std::move(operandCoaList));
}
}
for (size_t i = 0; i < op->GetOutputOperandSize(); i++) {
auto oOperand = op->GetOutputOperand(i);
if ((op->GetOOpAttrOffset(i) != -1) || (inOutCasts.count(oOperand) > 0)) {
continue;
}
if (oOperand->GetConsumers().empty() && !OpcodeManager::Inst().IsCopyInOrOut(op->GetOpcode())) {
continue;
}
auto it = processedOperands.find(oOperand);
if (it != processedOperands.end()) {
op->SetOOpAtt(i, it->second, isAtomicOp(op.get()));
continue;
}
if (!oOperand->GetDynOffset().empty() || !oOperand->GetDynValidShape().empty()) {
auto operandCoaList = NormalizeTensor(oOperand, coaIndex, valueToIndex);
processedOperands.emplace(oOperand, coaIndex);
op->SetOOpAtt(i, coaIndex, isAtomicOp(op.get()));
coaIndex += operandCoaList.size();
coaLists.emplace_back(std::move(operandCoaList));
}
}
}
}
void Function::NormalizeCoaForSpecialInfo(std::vector<std::vector<SymbolicScalar>>& coaLists, int& coaIndex)
{
bool valueToIndex = parent_->GetFunctionType() == FunctionType::DYNAMIC_LOOP_PATH;
for (auto& op : operations_) {
if (op->GetOpcode() == Opcode::OP_VEC_DUP || op->GetOpcode() == Opcode::OP_RANGE ||
op->GetOpcode() == Opcode::OP_TRIUL || op->GetOpcode() == Opcode::OP_UNIFORM) {
if (op->HasAttr(OpAttributeKey::dynScalar)) {
SymbolicScalar dynScalar = op->GetSymbolicScalarAttribute(OpAttributeKey::dynScalar);
std::vector<SymbolicScalar> valueCoaList;
MaybeNormalizeValue(valueCoaList, dynScalar, coaIndex, valueToIndex);
op->SetAttribute(OpAttributeKey::dynScalar, dynScalar);
coaLists.emplace_back(valueCoaList);
coaIndex += 1;
}
} else if (op->GetOpcode() == Opcode::OP_BIND_TENSOR) {
if (op->HasAttr(OpAttributeKey::bindTensor) && (op->GetOOperands().size() == 1UL)) {
SymbolicScalar bindTensor = op->GetSymbolicScalarAttribute(OpAttributeKey::bindTensor);
std::vector<SymbolicScalar> valueCoaList;
MaybeNormalizeValue(valueCoaList, bindTensor, coaIndex, valueToIndex);
coaLists.emplace_back(valueCoaList);
coaIndex += 1;
}
} else if (
op->GetOpcode() == Opcode::OP_NCHW2NC1HWC0 || op->GetOpcode() == Opcode::OP_NCHW2Fractal_Z ||
op->GetOpcode() == Opcode::OP_NC1HWC02NCHW || op->GetOpcode() == Opcode::OP_NCDHW2NDC1HWC0 ||
op->GetOpcode() == Opcode::OP_NCDHW2FRACTAL_Z_3D || op->GetOpcode() == Opcode::OP_NDC1HWC02NCDHW) {
if (op->HasAttr(OpAttributeKey::transDataOffset)) {
std::vector<SymbolicScalar> offsets;
op->GetAttr(OpAttributeKey::transDataOffset, offsets);
for (auto& offset : offsets) {
std::vector<SymbolicScalar> valueCoaList;
MaybeNormalizeValue(valueCoaList, offset, coaIndex, valueToIndex);
coaLists.emplace_back(valueCoaList);
coaIndex += 1;
}
op->SetAttribute(OpAttributeKey::transDataOffset, offsets);
}
}
NormalizeReshapeCopyDynValidShape(op.get(), coaLists, coaIndex, valueToIndex);
}
}
void Function::DumpTopoFile(const std::string& fileName) const
{
Json totalTopoJson;
for (const auto& topo : topoInfo_.GetTopology()) {
Json sJson;
sJson["taskId"] = topo.esgId;
sJson["successors"] = Json::array();
for (const auto& successor : topo.outGraph) {
sJson["successors"].push_back(successor);
}
int id = operations_[topo.esgId]->GetProgramId();
if (static_cast<size_t>(id) >= calleeMagicNameList_.size()) {
continue;
}
sJson["funcName"] = calleeMagicNameList_[id];
sJson["semanticLabel"] = operations_[topo.esgId]->GetSemanticLabelStr();
totalTopoJson.push_back(sJson);
}
std::ofstream ofs(fileName);
ofs << totalTopoJson.dump(1) << std::endl;
ofs.close();
}
std::string Function::DumpSSATitle() const
{
std::stringstream ss;
ss << GetMagicName() << "[" << functionMagic_ << "]"
<< " " << GetFunctionHash() << " " << GetFunctionTypeNameDict().Find(GetFunctionType()) << " "
<< GetGraphTypeNameDict().Find(GetGraphType());
return ss.str();
}
std::string Function::DumpSSARawTensor(int indent) const
{
std::string prefix(indent, ' ');
std::unordered_set<int> dumpedRawTensor;
auto dumped = [&dumpedRawTensor](const std::shared_ptr<LogicalTensor>& tensor) -> bool {
if (dumpedRawTensor.count(tensor->GetRawTensor()->GetRawMagic()) == 0) {
dumpedRawTensor.insert(tensor->GetRawTensor()->GetRawMagic());
return false;
} else {
return true;
}
};
std::stringstream ss;
int rawIndex = 0;
for (size_t i = 0; i < inCasts_.size(); ++i) {
if (!dumped(inCasts_[i])) {
ss << prefix << "RAWTENSOR[" << std::setw(SPACE_NUM_THREE) << std::setfill(' ') << rawIndex++ << "] "
<< inCasts_[i]->GetRawTensor()->DumpSSA() << "\n";
}
}
for (size_t i = 0; i < outCasts_.size(); ++i) {
if (!dumped(outCasts_[i])) {
ss << prefix << "RAWTENSOR[" << std::setw(SPACE_NUM_THREE) << std::setfill(' ') << rawIndex++ << "] "
<< outCasts_[i]->GetRawTensor()->DumpSSA() << "\n";
}
}
for (size_t i = 0; i < operations_.size(); ++i) {
for (auto& input : operations_[i]->GetIOperands()) {
if (!dumped(input)) {
ss << prefix << "RAWTENSOR[" << std::setw(SPACE_NUM_THREE) << std::setfill(' ') << rawIndex++ << "] "
<< input->GetRawTensor()->DumpSSA() << "\n";
}
}
for (auto& output : operations_[i]->GetOOperands()) {
if (!dumped(output)) {
ss << prefix << "RAWTENSOR[" << std::setw(SPACE_NUM_THREE) << std::setfill(' ') << rawIndex++ << "] "
<< output->GetRawTensor()->DumpSSA() << "\n";
}
}
}
return ss.str();
}
std::string Function::DumpSSAIncast(int indent) const
{
std::string prefix(indent, ' ');
std::stringstream ss;
for (size_t i = 0; i < inCasts_.size(); ++i) {
ss << prefix << "INCAST[" << std::setw(SPACE_NUM_THREE) << std::setfill(' ') << i << "] "
<< inCasts_[i]->DumpSSA(false, false, true);
if (slotScope_ && i < slotScope_->ioslot.incastSlot.size()) {
auto& incastSlotList = slotScope_->ioslot.incastSlot[i];
ss << " fromSlot[";
for (size_t k = 0; k < incastSlotList.size(); k++) {
if (k != 0) {
ss << ", ";
}
ss << incastSlotList[k];
}
ss << "]";
}
ss << "\n";
}
return ss.str();
}
std::string Function::DumpSSAOutcast(int indent) const
{
std::string prefix(indent, ' ');
std::stringstream ss;
for (size_t i = 0; i < outCasts_.size(); ++i) {
ss << prefix << "OUTCAST[" << std::setw(SPACE_NUM_THREE) << std::setfill(' ') << i << "] "
<< outCasts_[i]->DumpSSA(false, false, true);
if (slotScope_ && i < slotScope_->ioslot.outcastSlot.size()) {
auto& outcastSlotList = slotScope_->ioslot.outcastSlot[i];
ss << " toSlot[";
for (size_t k = 0; k < outcastSlotList.size(); k++) {
if (k != 0) {
ss << ", ";
}
ss << outcastSlotList[k];
}
ss << "]";
}
ss << "\n";
}
return ss.str();
}
std::string Function::DumpSSAAttribute(int indent) const
{
std::string prefix(indent, ' ');
std::stringstream ss;
if (IsDynloop()) {
auto attr = GetDynloopAttribute();
ss << prefix << "LOOP SYMBOL " << attr->iterSymbolName << "\n";
ss << prefix << "LOOP BEGIN " << attr->Begin().Dump() << "\n";
ss << prefix << "LOOP END " << attr->End().Dump() << "\n";
ss << prefix << "LOOP STEP " << attr->Step().Dump() << "\n";
}
return ss.str();
}
constexpr int INDENT_TWO = 2;
std::string Function::DumpSSA() const
{
std::stringstream ss;
ss << "\n-------------\n";
ss << "Function " << DumpSSATitle() << " {\n";
ss << DumpSSARawTensor(INDENT_TWO) << "\n";
ss << DumpSSAIncast(INDENT_TWO) << "\n";
ss << DumpSSAOutcast(INDENT_TWO) << "\n";
ss << DumpSSAAttribute(INDENT_TWO) << "\n";
for (size_t i = 0; i < operations_.size(); ++i) {
auto op = operations_[i];
ss << op->DumpSSA(PREFIX);
}
ss << "}\n";
return ss.str();
}
std::string Function::Dump() const { return DumpSSA(); }
void Function::DumpFile(const std::string& filePath) const
{
std::ofstream fout(filePath);
CHECK(FeError::BAD_FD, fout.is_open()) << "Failed to open file: " << filePath;
fout << Dump();
fout.close();
}
void Function::UpdateOperandBeforeRemoveOp(Operation& op, const bool keepOutTensor)
{
if (!op.GetIOperands().empty() && !op.GetOOperands().empty()) {
LogicalTensorPtr inputTensor = op.GetIOperands().at(0);
LogicalTensorPtr outputTensor = op.GetOOperands().at(0);
bool isOutCast = std::find(outCasts_.begin(), outCasts_.end(), outputTensor) != outCasts_.end();
if (isOutCast || keepOutTensor) {
outputTensor->RemoveProducer(op);
for (auto& producer : inputTensor->GetProducers()) {
outputTensor->AddProducer(*producer);
if (!inputTensor->GetDynValidShape().empty()) {
outputTensor->UpdateDynValidShape(inputTensor->GetDynValidShape());
}
producer->ReplaceOutputOperand(inputTensor, outputTensor);
}
inputTensor->GetProducers().clear();
inputTensor->RemoveConsumer(op);
for (auto& consumer : inputTensor->GetConsumers()) {
outputTensor->AddConsumer(*consumer);
consumer->ReplaceInputOperand(inputTensor, outputTensor);
}
inputTensor->GetConsumers().clear();
} else {
inputTensor->RemoveConsumer(op);
for (const auto& consumer : outputTensor->GetConsumers()) {
inputTensor->AddConsumer(consumer);
consumer->ReplaceInputOperand(outputTensor, inputTensor);
}
outputTensor->GetConsumers().clear();
}
}
}
* @brief handle input and output control edges
* all input ctrl edges shall be moved to output ops of current op
* all output ctrl edges shall be moved to input ops of current op
* @param op
*/
void Function::HandleControlOps(Operation& op, std::vector<Operation*>& toRemoveOps) const
{
const auto& inputCtrlOpSet = op.GetInCtrlOperations();
if (!inputCtrlOpSet.empty()) {
auto outputOps = GetAllOutputOperations(op);
for (auto peerCtrlOp : inputCtrlOpSet) {
if (peerCtrlOp == nullptr) {
continue;
}
if (peerCtrlOp->OnlyHasCtrlEdgeToOp(op)) {
op.RemoveInCtrlOperation(*peerCtrlOp);
toRemoveOps.push_back(peerCtrlOp);
} else {
for (auto& outputOp : outputOps) {
outputOp->AddInCtrlOperation(*peerCtrlOp);
}
}
}
op.ClearInCtrlOperations();
}
const auto& outputCtrlOpSet = op.GetOutCtrlOperations();
if (!outputCtrlOpSet.empty()) {
auto inputOps = GetAllInputOperations(op);
for (auto& inputOp : inputOps) {
for (auto outCtrlOp : outputCtrlOpSet) {
inputOp->AddOutCtrlOperation(*outCtrlOp);
}
}
op.ClearOutCtrlOperations();
}
}
Operation* Function::GetOpByOpMagic(const int opMagic) const
{
for (auto op : operations_) {
if (op->GetOpMagic() == opMagic) {
return op.get();
}
}
return nullptr;
}
bool Function::TensorReuse(const LogicalTensorPtr& dstTensor, const LogicalTensorPtr& srcTensor)
{
if (dstTensor == nullptr || srcTensor == nullptr) {
return false;
}
if (dstTensor->Datatype() != srcTensor->Datatype() ||
dstTensor->tensor->GetRawShapeSize() != srcTensor->tensor->GetRawShapeSize()) {
FE_LOGI("Data type or raw shape size of src and dst tensor is not same.");
return false;
}
if (dstTensor->tensor->rawshape == srcTensor->tensor->rawshape) {
dstTensor->tensor = srcTensor->tensor;
} else {
dstTensor->tensor->actualRawmagic =
srcTensor->tensor->actualRawmagic == -1 ? srcTensor->tensor->rawmagic : srcTensor->tensor->actualRawmagic;
}
return true;
}
bool Function::IsFromInCast(const std::shared_ptr<LogicalTensor>& tensor)
{
for (auto& t : inCasts_) {
if (t->GetRawMagic() == tensor->GetRawMagic()) {
return true;
}
}
return false;
}
bool Function::IsFromOutCast(const std::shared_ptr<LogicalTensor>& tensor)
{
for (auto& t : outCasts_) {
if (t->GetRawMagic() == tensor->GetRawMagic()) {
return true;
}
}
return false;
}
bool Function::IsFromDummyOutCast(int rawMagic)
{
for (auto& t : outCasts_) {
if (t->tensor->rawmagic == rawMagic) {
return true;
}
}
return false;
}
int Function::GetIncastIndex(std::shared_ptr<LogicalTensor>& tensor) const
{
for (size_t idx = 0; idx < inCasts_.size(); idx++) {
if (inCasts_[idx] == tensor) {
return (int)idx;
}
}
return INVALID_IOINDEX;
}
int Function::GetOutcastIndex(std::shared_ptr<LogicalTensor>& tensor) const
{
for (size_t idx = 0; idx < outCasts_.size(); idx++) {
if (outCasts_[idx] == tensor) {
return (int)idx;
}
}
return INVALID_IOINDEX;
}
TensorGraphInfo Function::GetGraphInfo()
{
std::vector<LogicalTensors> callopInCasts, callopOutCasts;
std::set<std::shared_ptr<Operation>> viewOpSet, assembleOpSet;
std::set<std::shared_ptr<LogicalTensor>> iOperandSet, oOperandSet;
std::vector<std::shared_ptr<Operation>> operations;
for (auto& op : operations_) {
if (op->GetOpcode() == Opcode::OP_VIEW) {
viewOpSet.emplace(op);
continue;
}
if (op->GetOpcode() == Opcode::OP_ASSEMBLE) {
assembleOpSet.emplace(op);
continue;
}
FE_ASSERT(FeError::INVALID_VAL, op->GetOpcode() == Opcode::OP_CALL)
<< "Invalid operation code: " << static_cast<int>(op->GetOpcode()) << "\n"
<< "Operation: " << op->Dump();
operations.emplace_back(op);
LogicalTensors incasts;
LogicalTensors outcasts;
for (auto& iOperand : op->GetIOperands()) {
auto& viewOp = *iOperand->GetProducers().begin();
auto& incast = viewOp->GetIOperands()[0];
iOperandSet.emplace(iOperand);
incasts.push_back(incast);
}
for (auto& oOperand : op->GetOOperands()) {
auto& assembleOp = *oOperand->GetConsumers().begin();
auto& outcast = assembleOp->GetOOperands()[0];
oOperandSet.emplace(oOperand);
outcasts.push_back(outcast);
}
callopInCasts.emplace_back(incasts);
callopOutCasts.emplace_back(outcasts);
}
operations_ = operations;
return std::make_tuple(
std::move(callopInCasts), std::move(callopOutCasts), std::move(viewOpSet), std::move(assembleOpSet),
std::move(iOperandSet), std::move(oOperandSet));
}
void Function::ClearUselessLink(TensorGraphInfo& graphInfo)
{
auto& callopInCasts = std::get<0>(graphInfo);
auto& callopOutCasts = std::get<1>(graphInfo);
auto& viewOpSet = std::get<2>(graphInfo);
auto& assembleOpSet = std::get<3>(graphInfo);
auto& iOperandSet = std::get<4>(graphInfo);
auto& oOperandSet = std::get<5>(graphInfo);
for (auto iOperand : iOperandSet) {
iOperand->GetProducers().clear();
iOperand->GetConsumers().clear();
};
for (auto oOperand : oOperandSet) {
oOperand->GetProducers().clear();
oOperand->GetConsumers().clear();
};
for (auto viewOp : viewOpSet) {
viewOp->GetIOperands().clear();
viewOp->GetOOperands().clear();
};
for (auto assembleOp : assembleOpSet) {
assembleOp->GetIOperands().clear();
assembleOp->GetOOperands().clear();
};
for (auto incasts : callopInCasts) {
for (auto incast : incasts) {
incast->GetConsumers().clear();
}
};
for (auto outcasts : callopOutCasts) {
for (auto outcast : outcasts) {
outcast->GetProducers().clear();
}
};
for (auto operation : operations_) {
operation->GetIOperands().clear();
operation->GetOOperands().clear();
};
}
void Function::LinkIoWithCallOp(std::vector<LogicalTensors>& callopInCasts, std::vector<LogicalTensors>& callopOutCasts)
{
for (size_t idx = 0; idx < operations_.size(); ++idx) {
auto& incasts = callopInCasts[idx];
for (auto incast : incasts) {
incast->AddConsumer(*operations_[idx]);
operations_[idx]->iOperand.emplace_back(incast);
}
}
for (size_t idx = 0; idx < operations_.size(); ++idx) {
auto& outcasts = callopOutCasts[idx];
for (auto outcast : outcasts) {
outcast->AddProducer(*operations_[idx]);
operations_[idx]->oOperand.emplace_back(outcast);
}
}
}
void Function::RemoveCallOpViewAssemble()
{
auto graphInfo = GetGraphInfo();
ClearUselessLink(graphInfo);
LinkIoWithCallOp(std::get<0>(graphInfo), std::get<1>(graphInfo));
}
void Function::UpdateOriIocastSlot(const std::shared_ptr<TensorSlotScope> scope)
{
FE_ASSERT(FeError::INVALID_PTR, slotScope_ != nullptr) << "slotScope_ is null";
auto& incastDst = slotScope_->oriIncastReadSlotSet;
incastDst.insert(incastDst.end(), scope->incastReadSlotSet.begin(), scope->incastReadSlotSet.end());
auto& outcastDst = slotScope_->oriOutcastWriteSlotSet;
outcastDst.insert(outcastDst.end(), scope->outcastWriteSlotSet.begin(), scope->outcastWriteSlotSet.end());
auto& iSlot = slotScope_->originalIocastsSlot.incastSlot;
auto& oSlot = slotScope_->originalIocastsSlot.outcastSlot;
iSlot.insert(iSlot.end(), scope->ioslot.incastSlot.begin(), scope->ioslot.incastSlot.end());
oSlot.insert(oSlot.end(), scope->ioslot.outcastSlot.begin(), scope->ioslot.outcastSlot.end());
}
void Function::SetCallOpSlot()
{
bool isAllCallOp = std::all_of(
operations_.begin(), operations_.end(), [](const auto& op) { return op->GetOpcode() == Opcode::OP_CALL; });
if (!isAllCallOp) {
return;
}
std::vector<Function*> calleeList = GetCalleeFunctionList();
for (auto callee : calleeList) {
if (callee == nullptr) {
continue;
}
const std::shared_ptr<TensorSlotScope> calleeScope = callee->GetSlotScope();
UpdateOriIocastSlot(calleeScope);
}
return;
}
std::vector<int> Function::GetInCastSlot(const std::shared_ptr<LogicalTensor>& incast)
{
std::vector<int> ret;
for (size_t idx = 0; idx < inCasts_.size(); ++idx) {
if (inCasts_[idx] == incast) {
auto& scope = GetSlotScope();
FE_ASSERT(FeError::INVALID_PTR, scope != nullptr) << "SlotScope is null";
ret = scope->ioslot.incastSlot[idx];
}
}
return ret;
}
std::vector<int> Function::GetOutCastSlot(const std::shared_ptr<LogicalTensor>& outcast)
{
std::vector<int> ret;
for (size_t idx = 0; idx < outCasts_.size(); ++idx) {
if (outCasts_[idx] == outcast) {
auto& scope = GetSlotScope();
FE_ASSERT(FeError::INVALID_PTR, scope != nullptr) << "SlotScope is null";
ret = scope->ioslot.outcastSlot[idx];
}
}
return ret;
}
void Function::ResetOperations()
{
operations_.clear();
opPosition_.clear();
sorted_ = false;
tensorMap_.Reset();
for (auto& t : inCasts_) {
tensorMap_.Insert(t);
t->GetConsumers().clear();
}
for (auto& t : outCasts_) {
t->GetProducers().clear();
}
}
std::set<Operation*, LogicalTensor::CompareOp> Function::FindConsumers(const Operation& op) const
{
std::set<Operation*, LogicalTensor::CompareOp> consumers;
for (const auto& output : op.oOperand) {
for (auto& consumer : output->GetConsumers()) {
if (consumer->BelongTo() == this) {
consumers.emplace(consumer);
}
}
}
return consumers;
}
std::set<Operation*, LogicalTensor::CompareOp> Function::FindProducers(const Operation& op) const
{
std::set<Operation*, LogicalTensor::CompareOp> producers;
for (const auto& input : op.iOperand) {
for (auto& producer : input->GetProducers()) {
if (producer->BelongTo() == this) {
producers.emplace(producer);
}
}
}
return producers;
}
std::vector<OriArgInfo> Function::GetOpOriginArgsInfo()
{
std::map<int, OriArgInfo> args;
int maxSubscript = 0;
for (const auto& incast : inCasts_) {
auto subscript = GetParamIndex(incast->GetRawTensor());
if (subscript == -1) {
continue;
}
maxSubscript = std::max(maxSubscript, subscript);
OriArgInfo info{
reinterpret_cast<uint64_t>(GetParamAddress(subscript)), incast->MemorySize(),
incast->GetCachePolicy(CachePolicy::PREFETCH)};
if (args.count(subscript) > 0) {
FE_ASSERT(args.at(subscript) == info)
<< "args.at(subscript): " << args.at(subscript).Dump() << ", info: " << info.Dump();
} else {
args.emplace(subscript, info);
}
}
for (const auto& outcast : outCasts_) {
auto subscript = GetParamIndex(outcast->GetRawTensor());
if (subscript == -1) {
continue;
}
maxSubscript = std::max(maxSubscript, subscript);
OriArgInfo info{
reinterpret_cast<uint64_t>(GetParamAddress(subscript)), outcast->MemorySize(),
outcast->GetCachePolicy(CachePolicy::PREFETCH)};
if (args.count(subscript) > 0) {
FE_ASSERT(args.at(subscript) == info)
<< "args.at(subscript): " << args.at(subscript).Dump() << ", info: " << info.Dump();
} else {
args.emplace(subscript, info);
}
}
std::vector<OriArgInfo> argsInfo(maxSubscript + 1);
for (int idx = 0; idx <= maxSubscript; idx++) {
if (args.count(idx) > 0) {
argsInfo[idx] = args.at(idx);
} else {
argsInfo[idx] = OriArgInfo{0, 0, false};
}
}
return argsInfo;
}
void Function::OpValidCheck(Operation& op) const
{
std::unordered_set<const Operation*> opMap;
std::unordered_set<std::shared_ptr<LogicalTensor>> incasts(GetIncast().begin(), GetIncast().end());
if (SPECIAL_OPCODE_SET.count(op.GetOpcode()) != 0) {
if (op.GetOpcode() == Opcode::OP_VIEW) {
FE_ASSERT(FeError::OUT_OF_RANGE, op.GetIOperands().size() == 1)
<< "OP_VIEW expects 1 input operand, but got " << op.GetIOperands().size();
FE_ASSERT(FeError::OUT_OF_RANGE, op.GetOOperands().size() <= 1)
<< "OP_VIEW expects at most 1 output operand, but got " << op.GetOOperands().size();
auto opAttr = std::dynamic_pointer_cast<ViewOpAttribute>(op.GetOpAttribute());
FE_ASSERT(FeError::INVALID_PTR, opAttr != nullptr)
<< "OP_VIEW should have a ViewOpAttribute, but it is null";
ASSERT(FeError::INVALID_VAL, op.GetIOperands()[0]->GetOffset().size() == opAttr->GetFromOffset().size())
<< "OP_VIEW input operand offset size does not match attribute from offset size";
if (!op.GetOOperands().empty()) {
ASSERT(FeError::INVALID_VAL, op.GetOOperands()[0]->GetOffset().size() == opAttr->GetFromOffset().size())
<< "OP_VIEW output operand offset size does not match attribute from offset size";
}
}
if (op.GetOpcode() == Opcode::OP_ASSEMBLE) {
FE_ASSERT(FeError::OUT_OF_RANGE, op.GetIOperands().size() == 1)
<< "OP_ASSEMBLE should have exactly 1 input operand, but has " << op.GetIOperands().size();
FE_ASSERT(FeError::OUT_OF_RANGE, op.GetOOperands().size() <= 1)
<< "OP_ASSEMBLE should have at most 1 output operand, but has " << op.GetOOperands().size();
auto opAttr = std::dynamic_pointer_cast<AssembleOpAttribute>(op.GetOpAttribute());
FE_ASSERT(FeError::INVALID_PTR, opAttr != nullptr)
<< "OP_ASSEMBLE should have an AssembleOpAttribute, but it is null";
if (!op.GetIOperands().empty()) {
ASSERT(FeError::INVALID_VAL, op.GetIOperands()[0]->GetOffset().size() == opAttr->GetToOffset().size())
<< "OP_ASSEMBLE input operand offset size does not match attribute to offset size";
}
ASSERT(FeError::INVALID_VAL, op.GetOOperands()[0]->GetOffset().size() == opAttr->GetToOffset().size())
<< "OP_ASSEMBLE output operand offset size does not match attribute to offset size";
}
} else {
FE_ASSERT(FeError::INVALID_PTR, op.GetOpAttribute() == nullptr)
<< "Non-special operation should not have an operation attribute";
}
FE_ASSERT(FeError::OUT_OF_RANGE, op.GetOpMagic() >= 0 && op.GetOpMagic() < opSeed_)
<< "Operation magic number is out of bounds: " << op.GetOpMagic() << ", function opSeed_ is: " << opSeed_;
if (!op.IsCall()) {
FE_ASSERT(FeError::OUT_OF_RANGE, op.GetOOperands().size() <= 1)
<< "Non-call operation should have at most 1 output operand, but has " << op.GetOOperands().size();
}
for (auto& oOperand : op.GetOOperands()) {
FE_ASSERT(FeError::INVALID_VAL, oOperand->GetShape().size() == oOperand->GetOffset().size())
<< "Output operand shape size does not match offset size";
FE_ASSERT(&oOperand->BelongFunction() == this) << "Output operand does not belong to current function";
auto tmp = GraphUtils::GetTensorByMagic(const_cast<Function&>(*this), oOperand->magic);
FE_ASSERT(FeError::NOT_EXIST, tmp == oOperand) << "Tensor map does not match output operand";
FE_ASSERT(oOperand->HasProducer(op))
<< "Output operand does not have current operation as a producer, opmagic: " << op.GetOpMagic()
<< ", operand magic: " << oOperand->magic;
}
for (auto& iOperand : op.GetIOperands()) {
FE_ASSERT(FeError::INVALID_VAL, iOperand->GetShape().size() == iOperand->GetOffset().size())
<< "Input operand shape size does not match offset size";
FE_ASSERT(&iOperand->BelongFunction() == this) << "Input operand does not belong to current function";
if (!iOperand->GetProducers().empty() || incasts.count(iOperand) != 0) {
auto tmp = GraphUtils::GetTensorByMagic(const_cast<Function&>(*this), iOperand->magic);
FE_ASSERT(FeError::NOT_EXIST, tmp == iOperand) << "Tensor map does not match input operand";
}
FE_ASSERT(iOperand->HasConsumer(op)) << "Input operand does not have current operation as a consumer";
for (const auto& producer : iOperand->GetProducers()) {
FE_ASSERT(producer->BelongTo() == this) << "Producer does not belong to current function";
FE_ASSERT(FeError::OUT_OF_RANGE, producer->GetOpMagic() >= 0 && producer->GetOpMagic() < opSeed_)
<< "Producer magic number is out of bounds: " << producer->GetOpMagic()
<< ", function opSeed_ is: " << opSeed_ << ", producer in tensor(" << iOperand->magic << ","
<< iOperand->tensor->rawmagic << ")";
if (producer->IsDeleted()) {
continue;
}
FE_ASSERT(FeError::NOT_EXIST, opMap.find(producer) != opMap.end()) << "Producer not found in operation map";
}
}
FE_ASSERT(FeError::IS_EXIST, opMap.count(&op) == 0) << "Operation is already in the operation map";
opMap.emplace(&op);
}
DyndevFunctionAttribute::ValueDependDesc Function::LookupValueDepend()
{
struct ValueDependSearcher {
static void Search(DyndevFunctionAttribute::ValueDependDesc& desc, const SymbolicScalar& attr)
{
std::vector<RawSymbolicScalarPtr> callList =
LookupExpressionByOpcode(attr.Raw(), SymbolicOpcode::T_MOP_CALL);
for (auto& call : callList) {
auto caller = call->GetExpressionOperandList()[0];
if (!caller->IsSymbol()) {
continue;
}
std::string name = caller->GetSymbolName();
if (CallIsGetInputData(name)) {
desc.getInputDataCount++;
} else if (CallIsGetTensorData(name)) {
desc.getTensorDataCount++;
}
}
}
};
DyndevFunctionAttribute::ValueDependDesc desc;
if (GetFunctionType() == FunctionType::DYNAMIC_LOOP) {
auto loopAttr = GetDynloopAttribute();
ValueDependSearcher::Search(desc, loopAttr->Begin());
ValueDependSearcher::Search(desc, loopAttr->End());
ValueDependSearcher::Search(desc, loopAttr->Step());
for (auto& path : loopAttr->GetPathList()) {
for (auto& cond : path.GetPathCondList()) {
ValueDependSearcher::Search(desc, cond.GetCond());
}
}
} else {
for (auto& op : Operations(false)) {
std::vector<std::reference_wrapper<SymbolicScalar>> attrList = op.GetDynamicAttributeList();
for (auto& attr : attrList) {
ValueDependSearcher::Search(desc, attr.get());
}
}
}
return desc;
}
void Function::ValidCheck() const
{
int opMagic = -1000000;
for (auto& op : const_cast<Function&>(*this).Operations()) {
opMagic = std::max(opMagic, op.GetOpMagic());
}
FE_ASSERT(FeError::OUT_OF_RANGE, opMagic + 1 <= opSeed_)
<< "Invalid opMagic range: max opMagic is " << opMagic << ", function opSeed_ is: " << opSeed_;
TensorMagicCheck();
std::unordered_map<std::shared_ptr<LogicalTensor>, std::vector<Operation*>> used;
for (auto& op : const_cast<Function&>(*this).Operations()) {
if (op.IsDeleted()) {
continue;
}
for (const auto& operand : op.GetOOperands()) {
if (used.count(operand) > 0) {
for (auto innerOp : used.at(operand)) {
FE_ASSERT(FeError::IS_EXIST, innerOp->ComputeHash() != op.ComputeHash())
<< "Duplicate operation detected with same hash: " << op.ComputeHash();
}
}
used[operand].emplace_back(&op);
}
}
for (auto& op : const_cast<Function&>(*this).Operations()) {
if (op.IsDeleted()) {
continue;
}
OpValidCheck(op);
}
}
std::shared_ptr<OpAttribute> Function::CreateCallOpAttribute(
const std::vector<std::vector<SymbolicScalar>>& argList, const std::map<int, SymbolicScalar>& outIndexToExpr)
{
FunctionHash hash;
if (rootFunc_ != nullptr) {
hash = rootFunc_->ComputeHash();
} else {
hash = ComputeHash();
}
auto opAttribute = std::make_shared<CallOpAttribute>(hash, argList, GetMagicName(), outIndexToExpr);
return opAttribute;
}
using SameSlotSetIndex = std::map<std::vector<int>, std::vector<int>>;
template <typename T>
void RemoveDupIndices(std::vector<T>& data, std::vector<int> indexList)
{
std::sort(indexList.begin(), indexList.end(), std::greater<int>());
for (int index : indexList) {
if (index >= 0 && index < static_cast<int>(data.size())) {
data.erase(data.begin() + index);
}
}
}
SameSlotSetIndex ClassifyIocasts(const std::vector<std::vector<int>>& vec)
{
SameSlotSetIndex classification;
for (size_t idx = 0; idx < vec.size(); ++idx) {
classification[vec[idx]].push_back(idx);
}
for (auto it = classification.begin(); it != classification.end();) {
if (it->second.size() == 1) {
it = classification.erase(it);
} else {
++it;
}
}
return classification;
}
void Function::DoMergeFunctionDupIncast()
{
auto sameSlotSetIndex = ClassifyIocasts(GetSlotScope()->ioslot.incastSlot);
std::vector<int> removeIdx;
for (auto& pair : sameSlotSetIndex) {
auto& slotSetIndex = pair.second;
FE_ASSERT(FeError::INVALID_VAL, slotSetIndex.size() > 1) << "Slot set index should have more than one element";
removeIdx.insert(removeIdx.end(), slotSetIndex.begin() + 1, slotSetIndex.end());
auto oriIncast = inCasts_[slotSetIndex[0]];
auto newIncast = std::make_shared<LogicalTensor>(
*this, oriIncast->tensor->datatype, oriIncast->shape, oriIncast->tensor->GetDynRawShape(),
oriIncast->Format(), oriIncast->tensor->GetSymbol());
newIncast->tensor->UpdateDynRawShape(oriIncast->tensor->GetDynRawShape());
for (auto incastIdx : slotSetIndex) {
FE_ASSERT(FeError::NOT_EXIST, inCasts_[incastIdx]->GetConsumers().size() > 0)
<< "Incast at index " << incastIdx << " should have at least one consumer";
auto op = *inCasts_[incastIdx]->GetConsumers().begin();
op->ReplaceIOperand(0, newIncast);
tensorMap_.Insert(newIncast);
}
inCasts_[slotSetIndex[0]] = newIncast;
}
RemoveDupIndices(inCasts_, removeIdx);
RemoveDupIndices(GetSlotScope()->incastReadSlotSet, removeIdx);
RemoveDupIndices(GetSlotScope()->ioslot.incastSlot, removeIdx);
}
void Function::DoMergeFunctionDupOutcast()
{
auto sameSlotSetIndex = ClassifyIocasts(GetSlotScope()->ioslot.outcastSlot);
std::vector<int> removeIdx;
for (auto& pair : sameSlotSetIndex) {
auto& slotSetIndex = pair.second;
FE_ASSERT(FeError::INVALID_VAL, slotSetIndex.size() > 1) << "Slot set index should have more than one element";
removeIdx.insert(removeIdx.end(), slotSetIndex.begin() + 1, slotSetIndex.end());
auto oriOutcast = outCasts_[slotSetIndex[0]];
auto newOutcast = std::make_shared<LogicalTensor>(
*this, oriOutcast->tensor->datatype, oriOutcast->shape, oriOutcast->tensor->GetDynRawShape(),
oriOutcast->Format(), oriOutcast->tensor->GetSymbol());
newOutcast->tensor->UpdateDynRawShape(oriOutcast->tensor->GetDynRawShape());
for (auto incastIdx : slotSetIndex) {
FE_ASSERT(FeError::NOT_EXIST, outCasts_[incastIdx]->GetProducers().size() > 0)
<< "Outcast at index " << incastIdx << " should have at least one producer";
auto& op = *outCasts_[incastIdx]->GetProducers().begin();
op->ReplaceOOperand(0, newOutcast);
tensorMap_.Insert(newOutcast);
}
outCasts_[slotSetIndex[0]] = newOutcast;
}
RemoveDupIndices(outCasts_, removeIdx);
RemoveDupIndices(GetSlotScope()->outcastWriteSlotSet, removeIdx);
RemoveDupIndices(GetSlotScope()->ioslot.outcastSlot, removeIdx);
}
void Function::MergeFunctionDupIocast()
{
DoMergeFunctionDupIncast();
DoMergeFunctionDupOutcast();
}
bool Function::InsertLoopIdxNameList(const std::string& idxName)
{
if (parent_ == nullptr) {
loopIdxNameList_.insert(idxName);
return true;
}
auto realParent = parent_->parent_;
FE_ASSERT(FeError::INVALID_VAL, realParent != nullptr)
<< "Current function name: " << GetRawName() << " has no parent function, frontend kernel is not JIT decorated";
if (realParent->GetFunctionType() == FunctionType::DYNAMIC_LOOP &&
realParent->LoopIdxNameList().find(idxName) != realParent->LoopIdxNameList().end()) {
return false;
}
loopIdxNameList_.insert(idxName);
for (const auto& it : realParent->LoopIdxNameList()) {
loopIdxNameList_.insert(it);
}
return true;
}
DefineProg::DefineProg(const std::string& name) : isRecording_(true) { Program::GetInstance().SetName(name); }
DefineProg::~DefineProg()
{
if (isRecording_) {
FE_LOGI("prog.end: name=%s", Program::GetInstance().Name().c_str());
}
}