* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
* \file dev_encode_function.h
* \brief
*/
#pragma once
#include "machine/utils/dynamic/dev_encode_types.h"
#include "machine/utils/dynamic/dev_encode_tensor.h"
#include "machine/utils/dynamic/dev_encode_operation.h"
namespace npu::tile_fwk {
struct L2Info;
struct CceCodeInfo;
class RawTensor;
class LogicalTensor;
class Operation;
class Function;
class IncastOutcastLink;
class IncastOutcastSlot;
class SymbolicSymbolTable;
class SymbolicExpressionTable;
}
namespace npu::tile_fwk::dynamic {
constexpr int INVALID_INDEX = -1;
struct DevAscendFunctionPredInfo {
uint64_t totalZeroPred;
uint64_t totalZeroPredAIV;
uint64_t totalZeroPredAIC;
uint64_t totalZeroPredHub;
uint64_t totalZeroPredAicpu;
};
struct EncodeDevAscendFunctionParam {
std::unordered_map<uint64_t, int> calleeHashIndexDict;
std::vector<CceCodeInfo> cceCodeInfoList;
const SymbolicSymbolTable* symbolTable;
const IncastOutcastLink* inoutLink;
const SymbolicExpressionTable* expressionTable;
const IncastOutcastSlot* slot;
Function* devRoot;
std::vector<RuntimeSlotDesc> outcastDescList;
std::vector<int> assembleSlotList;
};
struct DevAscendFunctionDuppedData;
struct DevAscendFunction {
uint64_t rootHash;
uint64_t funcKey;
DevAscendFunction* sourceFunc{nullptr};
uintdevptr_t runtimeWorkspace;
uintdevptr_t opAttrs;
int funcidx;
int stackWorkSpaceSize;
uint32_t getInputDataCount;
uint32_t getTensorDataCount;
uint64_t hubOpCount_{0};
DevLocalVector<AddressDescriptor> incastAddressList;
DevLocalVector<AddressDescriptor> outcastAddressList;
DevLocalVector<uint64_t> expressionList;
#define allocateLastField expressionList
DevAscendFunctionPredInfo predInfo_;
uint64_t duppedDataAllocSize_;
uint64_t duppedDataCopySize_;
DevLocalVector<uint8_t> duppedData_;
public:
uint64_t wrapIdNum_{0};
int* GetOpWrapListAddr() { return &At(opWrapList_, 0); }
uint64_t rootInnerTensorWsMemoryRequirement{0};
uint64_t exclusiveOutcastWsMemoryRequirement{0};
uint32_t GetMaxC() const { return maxC_; }
uint32_t GetMaxV() const { return maxV_; }
void SetMaxCV(uint32_t maxC, uint32_t maxV) { maxC_ = maxC; maxV_ = maxV; }
private:
DevLocalVector<int> opWrapList_;
DevLocalVector<DevAscendRawTensor> rawTensorList_;
DevLocalVector<DevRawTensorDesc> rawTensorDescList_;
DevLocalVector<DevAscendTensor> tensorList_;
DevLocalVector<int> noPredOpList_;
DevLocalVector<int> noSuccOpList_;
DevLocalVector<DevAscendOperation> operationList_;
DevLocalVector<DevAscendOperationOperandInfo> operationOperandInfoList_;
DevLocalVector<SymInt> operationAttrList_;
DevLocalVector<int> opAttrOffsetList_;
DevLocalVector<int> opCalleeList_;
DevLocalVector<int> operationSuccList_;
DevLocalVector<int> operationCopyOutResolveSuccIndexList_;
DevLocalVector<DevAscendFunctionIncast> incastList;
DevLocalVector<DevAscendFunctionOutcast> outcastList;
DevLocalVector<int> slotList;
DevLocalVector<int> redaccAssembleSlotList_;
DevLocalVector<DevAscendFunctionCallOperandUse> useList;
DevLocalVector<DevAscendFunctionCallOperandUse> stitchPolicyFullCoverProducerList_;
DevLocalVector<uint32_t> stitchPolicyFullCoverOpList_;
DevLocalVector<uint32_t> cellMatchRuntimeFullUpdateTableList;
DevLocalVector<uint64_t> deadEndHubBitmap_;
DevLocalVector<uint64_t> tailTaskBitmap_;
DevLocalVector<char> rawName_;
#define sharedLastField rawName_
uint32_t maxC_{0};
uint32_t maxV_{0};
public:
uint8_t data[0];
* Duplicated:
* AddressDescriptor incastAddressListData;
* AddressDescriptor outcastAddressListData;
* DevAscendOperationDynamicField opDynamicFieldListData[];
* Allocated:
* uint64_t expressionListData[];
*
* Shared:
* DevRawTensorDesc rawTensorDescListData[];
* DevAscendRawTensor rawTensorListData[];
* DevAscendTensor tensorListData[];
* int noPredOpListData[];
* int noSuccOpListData[];
* DevAscendOperation operationListData[];
* DevAscendOperationOperandInfo operationOperandListData[];
* SymInt operationAttrListData[];
* int operationSuccListData[];
* int operationCopyOutResolveSuccIndexData[];
* DevAscendFunctionIncast incastListData[];
* DevAscendFunctionOutcast outcastListData[];
* int slotListData[];
* int outputOutcastSlotList[];
* int assembleOutcastSlotList[];
* int offsetIdxListData[];
* int shapeIdxListData[];
* int producerConsumerListData[];
* char rawNameData[];
* uint8_t duppedData[];
*/
template <typename T>
const T& At(const DevLocalVector<T>& localvec, int index) const
{
return *reinterpret_cast<T*>((reinterpret_cast<uint64_t>(this) + localvec.Offset(index)));
}
template <typename T>
T& At(const DevLocalVector<T>& localvec, int index)
{
return *reinterpret_cast<T*>((reinterpret_cast<uint64_t>(this) + localvec.Offset(index)));
}
template <typename T>
const T& At(const DevRelocVector<T>& localvec, int index) const
{
return localvec[index];
}
template <typename T>
T& At(DevRelocVector<T>& localvec, int index)
{
return localvec[index];
}
private:
std::string DumpTensor(int tensorIndex) const;
std::string DumpOperationAttr(
int operationIndex, uint64_t* runtimeExpressionList = nullptr, bool dumpIndex = false) const;
std::string DumpOperation(
int operationIndex, int& totalAttrStartIdx, const std::vector<uintdevptr_t>& ooperandAddrList = {},
const std::vector<uintdevptr_t>& ioperandAddrList = {}, uint64_t* runtimeExpressionList = nullptr) const;
std::string DumpRawTensor(int rawIndex, uintdevptr_t addr = 0) const;
std::string DumpIncast(
int incastIndex, const std::string& indent, uint64_t* runtimeExpressionList = nullptr,
const std::vector<uintdevptr_t>& slotAddrList = {}) const;
std::string DumpOutcast(
int outcastIndex, const std::string& indent, uint64_t* runtimeExpressionList = nullptr,
const std::vector<uintdevptr_t>& slotAddrList = {}) const;
public:
std::string Dump(int indent = 0) const;
bool HasValueDepend() const { return getInputDataCount + getTensorDataCount; }
schema::coa SchemaGetCoa(
int operationIndex, uint64_t* runtimeExpressionList = nullptr, bool dumpIndex = false) const
{
std::vector<schema::TextType> coaDataList;
for (size_t j = 0; j < GetOperationAttrSize(operationIndex); j++) {
const SymInt& s = GetOperationAttr(operationIndex, j);
std::string textData;
if (s.IsExpression()) {
if (runtimeExpressionList != nullptr) {
textData = std::to_string(runtimeExpressionList[s.Value()]);
} else {
textData = "?" + std::to_string(s.Value());
}
} else {
textData = std::to_string(s.Value());
}
coaDataList.push_back(schema::TextType(textData));
}
return schema::coa(schema::coaType(coaDataList, dumpIndex));
}
template <typename T>
uint64_t GetEndOffset(const DevLocalVector<T>& localvec) const
{
return localvec.End();
}
void Reloc(intptr_t , bool ) {}
int GetFuncKey() const { return funcKey; }
const DevAscendFunction* GetSource() const { return sourceFunc; }
DevAscendFunction* GetSource() { return sourceFunc; }
int GetRootIndex() const { return funcKey; }
const int& GetFuncidx() const { return funcidx; }
int& GetFuncidx() { return funcidx; }
const DevAscendFunctionPredInfo& GetPredInfo() const { return predInfo_; }
uint64_t GetDuppedDataAllocSize() const { return duppedDataAllocSize_; }
uint64_t GetDuppedDataCopySize() const { return duppedDataCopySize_; }
DevAscendFunctionDuppedData* GetDuppedData() const
{
return reinterpret_cast<DevAscendFunctionDuppedData*>(const_cast<uint8_t*>(&At(duppedData_, 0)));
}
int32_t* GetOpAttrOffsetAddr() { return &At(opAttrOffsetList_, 0); }
inline int32_t GetOpAttrOffsetSize() { return opAttrOffsetList_.size(); }
int* GetCalleeIndexAddr() { return &At(opCalleeList_, 0); }
uint64_t* GetExpressionAddr() { return &At(expressionList, 0); }
uint64_t GetAllocateSize() const { return GetEndOffset(allocateLastField); }
uint64_t GetSize() const { return GetEndOffset(sharedLastField); }
uintdevptr_t GetRuntimeWorkspace() const { return runtimeWorkspace; }
uintdevptr_t& GetRuntimeWorkspace() { return runtimeWorkspace; }
inline AddressDescriptor GetIncastAddress(int index) const { return At(incastAddressList, index); }
inline AddressDescriptor& GetIncastAddress(int index) { return At(incastAddressList, index); }
inline AddressDescriptor GetOutcastAddress(int index) const { return At(outcastAddressList, index); }
inline AddressDescriptor& GetOutcastAddress(int index) { return At(outcastAddressList, index); }
inline uint64_t GetExpression(int tableIndex) const { return At(expressionList, tableIndex); }
inline uint64_t& GetExpression(int tableIndex) { return At(expressionList, tableIndex); }
inline uint64_t GetExpressionSize() const { return expressionList.size(); }
inline uint64_t GetRawTensorSize() const { return rawTensorList_.size(); }
inline const DevAscendRawTensor* GetRawTensor(const DevAscendTensor* tensor) const
{
int rawTensorIndex = tensor->rawIndex;
return &At(rawTensorList_, rawTensorIndex);
}
inline const DevAscendRawTensor* GetRawTensor(int rawIndex) const { return &At(rawTensorList_, rawIndex); }
inline DevAscendRawTensor* GetRawTensor(int rawIndex) { return &At(rawTensorList_, rawIndex); }
inline const DevRawTensorDesc* GetRawTensorDesc(int rawIndex) const { return &At(rawTensorDescList_, rawIndex); }
inline DevRawTensorDesc* GetRawTensorDesc(int rawIndex) { return &At(rawTensorDescList_, rawIndex); }
inline size_t GetRawTensorDescSize() const { return rawTensorDescList_.size(); }
inline uint64_t GetTensorSize() const { return tensorList_.size(); }
inline const DevAscendTensor* GetTensor(int index) const { return &At(tensorList_, index); }
inline DevAscendTensor* GetTensor(int index) { return &At(tensorList_, index); }
inline size_t GetNoPredOpSize() const { return noPredOpList_.size(); }
inline int GetNoPredOpIdx(size_t idx) const { return At(noPredOpList_, idx); }
inline size_t GetNoSuccOpSize() const { return noSuccOpList_.size(); }
inline int GetNoSuccOpIdx(size_t idx) const { return At(noSuccOpList_, idx); }
inline size_t GetOperationSize() const { return operationList_.size(); }
inline bool IsDeadEndHub(uint32_t opIndex) const
{
if (deadEndHubBitmap_.size() == 0) return false;
uint32_t wordIdx = opIndex / 64;
return (At(deadEndHubBitmap_, wordIdx) & (1ULL << (opIndex % 64))) != 0;
}
inline bool IsTailTask(uint32_t opIndex) const
{
if (tailTaskBitmap_.size() == 0) {
return false;
}
uint32_t wordIdx = opIndex / 64;
return (At(tailTaskBitmap_, wordIdx) & (1ULL << (opIndex % 64))) != 0;
}
inline void ClearTailTask(uint32_t opIndex)
{
if (tailTaskBitmap_.size() == 0) {
return;
}
uint32_t wordIdx = opIndex / 64;
At(tailTaskBitmap_, wordIdx) &= ~(1ULL << (opIndex % 64));
}
inline bool ClearDeadEndHub(uint32_t opIndex)
{
if (deadEndHubBitmap_.size() == 0) return false;
uint32_t wordIdx = opIndex / 64;
uint64_t bit = 1ULL << (opIndex % 64);
auto& word = At(deadEndHubBitmap_, wordIdx);
bool wasSet = (word & bit) != 0;
word &= ~bit;
return wasSet;
}
inline void PropagateDeadHubClear(uint32_t clearedOpIdx)
{
constexpr size_t kMaxStack = 128;
uint32_t stack[kMaxStack];
size_t stackSize = 0;
stack[stackSize++] = clearedOpIdx;
size_t opCount = GetOperationSize();
while (stackSize > 0) {
uint32_t target = stack[--stackSize];
for (size_t op = 0; op < opCount; op++) {
size_t succSize;
const int* succList = GetOperationDepGraphSuccAddr(static_cast<int>(op), succSize);
for (size_t j = 0; j < succSize; j++) {
if (static_cast<uint32_t>(succList[j]) != target) continue;
ClearTailTask(static_cast<uint32_t>(op));
if (ClearDeadEndHub(static_cast<uint32_t>(op)) && stackSize < kMaxStack) {
stack[stackSize++] = static_cast<uint32_t>(op);
}
break;
}
}
}
}
inline size_t GetBitmapByteSize() const { return deadEndHubBitmap_.size() * sizeof(uint64_t); }
inline void BackupBitmapTo(uint64_t* deadEndBuf, uint64_t* tailBuf, size_t byteSize) const
{
if (byteSize == 0) {
return;
}
memcpy_s(deadEndBuf, byteSize, &At(deadEndHubBitmap_, 0), byteSize);
memcpy_s(tailBuf, byteSize, &At(tailTaskBitmap_, 0), byteSize);
}
inline void RestoreBitmapFrom(const uint64_t* deadEndBuf, const uint64_t* tailBuf, size_t byteSize)
{
if (byteSize == 0) {
return;
}
memcpy_s(&At(deadEndHubBitmap_, 0), byteSize, deadEndBuf, byteSize);
memcpy_s(&At(tailTaskBitmap_, 0), byteSize, tailBuf, byteSize);
}
inline uint32_t GetOperationStitchIndex(int operationIndex) const
{
return At(operationList_, operationIndex).stitchIndex;
}
inline uint32_t GetOperationDebugOpmagic(int operationIndex) const
{
return At(operationList_, operationIndex).debugOpmagic;
}
inline size_t GetOperationIOperandSize(int operationIndex) const
{
return At(operationList_, operationIndex).ioperandList.size();
}
inline size_t GetOperationOOperandSize(int operationIndex) const
{
return At(operationList_, operationIndex).ooperandList.size();
}
inline const DevAscendOperationOperandInfo& GetOperationIOperandInfo(int operationIndex, int operandIndex) const
{
return At(At(operationList_, operationIndex).ioperandList, operandIndex);
}
inline const DevAscendTensor* GetOperationIOperand(int operationIndex, int operandIndex) const
{
int tensorIndex = GetOperationIOperandInfo(operationIndex, operandIndex).tensorIndex;
return GetTensor(tensorIndex);
}
inline const DevAscendOperationOperandInfo& GetOperationOOperandInfo(int operationIndex, int operandIndex) const
{
return At(At(operationList_, operationIndex).ooperandList, operandIndex);
}
inline const DevAscendTensor* GetOperationOOperand(int operationIndex, int operandIndex) const
{
int tensorIndex = GetOperationOOperandInfo(operationIndex, operandIndex).tensorIndex;
return GetTensor(tensorIndex);
}
inline const DevAscendOperationOperandInfo& GetOperationOperandInfo(
int operationIndex, int operandIndex, bool isIOperand = true) const
{
if (isIOperand) {
return GetOperationIOperandInfo(operationIndex, operandIndex);
} else {
return GetOperationOOperandInfo(operationIndex, operandIndex);
}
}
inline size_t GetOperationAttrSize(int operationIndex) const
{
return At(operationList_, operationIndex).attrList.size();
}
inline const SymInt& GetOperationAttr(int operationIndex, int attrIndex) const
{
return At(At(operationList_, operationIndex).attrList, attrIndex);
}
inline int GetOperationAttrCalleeIndex(int operationIndex) const
{
return GetOperationAttr(operationIndex, 0).Value();
}
inline int GetOpAttrSize() { return operationAttrList_.size(); }
inline void FillOpAttrs(DevCceBinary* cceInfo) { (void)cceInfo; }
inline const uint32_t& GetOperationDepGraphPredCount(int operationIndex) const
{
return At(operationList_, operationIndex).depGraphPredCount;
}
inline uint32_t& GetOperationDepGraphPredCount(int operationIndex)
{
return At(operationList_, operationIndex).depGraphPredCount;
}
inline const DevLocalVector<int>& GetOperationDepGraphSuccList(int operationIndex) const
{
return At(operationList_, operationIndex).depGraphSuccList;
}
inline const DevLocalVector<int>& GetOperationDepGraphCopyOutResolveSuccIndexList(int operationIndex) const
{
return At(operationList_, operationIndex).depGraphCopyOutResolveSuccIndexList;
}
inline const int* GetOperationDepGraphSuccAddr(int operationIndex, size_t& size) const
{
auto& succList = At(operationList_, operationIndex).depGraphSuccList;
size = succList.size();
return &At(succList, 0);
}
inline const int* GetOperationDepGraphCopyOutResolveSuccIndexAddr(int operationIndex, size_t& size) const
{
auto& succIndexList = At(operationList_, operationIndex).depGraphCopyOutResolveSuccIndexList;
size = succIndexList.size();
return &At(succIndexList, 0);
}
inline size_t GetIncastSize() const { return incastList.size(); }
inline const struct DevAscendFunctionIncast& GetIncast(int index) const { return At(incastList, index); }
inline struct DevAscendFunctionIncast& GetIncast(int index) { return At(incastList, index); }
inline const DevAscendRawTensor* GetIncastRawTensor(int index) const
{
int tensorIndex = GetIncast(index).tensorIndex;
return GetRawTensor(GetTensor(tensorIndex));
}
inline size_t GetOutcastSize() const { return outcastList.size(); }
inline const struct DevAscendFunctionOutcast& GetOutcast(int index) const { return At(outcastList, index); }
inline struct DevAscendFunctionOutcast& GetOutcast(int index) { return At(outcastList, index); }
inline size_t GetRedaccAssembleSlotListSize() const { return redaccAssembleSlotList_.size(); }
inline const int& GetRedaccAssembleSlotList(int index) const { return At(redaccAssembleSlotList_, index); }
inline int& GetRedaccAssembleSlotList(int index) { return At(redaccAssembleSlotList_, index); }
int LookupIncastBySlotIndex(int slotIndex) const
{
for (size_t incastIndex = 0; incastIndex < GetIncastSize(); incastIndex++) {
const DevAscendFunctionIncast& incast = GetIncast(incastIndex);
for (size_t fromIndex = 0; fromIndex < incast.fromSlotList.size(); fromIndex++) {
int slot = At(incast.fromSlotList, fromIndex);
if (slot == slotIndex) {
return static_cast<int>(incastIndex);
}
}
}
return INVALID_INDEX;
}
std::vector<int> LookupIncastBySlotIndexList(const std::vector<int>& slotIndexList) const
{
std::vector<int> resultList(slotIndexList.size());
for (size_t i = 0; i < slotIndexList.size(); i++) {
resultList[i] = LookupIncastBySlotIndex(slotIndexList[i]);
}
return resultList;
}
int LookupOutcastBySlotIndex(int slotIndex) const
{
for (size_t outcastIndex = 0; outcastIndex < GetOutcastSize(); outcastIndex++) {
const DevAscendFunctionOutcast& outcast = GetOutcast(outcastIndex);
for (size_t toIndex = 0; toIndex < outcast.toSlotList.size(); toIndex++) {
int slot = At(outcast.toSlotList, toIndex);
if (slot == slotIndex) {
return static_cast<int>(outcastIndex);
}
}
}
return INVALID_INDEX;
}
std::vector<int> LookupOutcastBySlotIndexList(const std::vector<int>& slotIndexList) const
{
std::vector<int> resultList(slotIndexList.size());
for (size_t i = 0; i < slotIndexList.size(); i++) {
resultList[i] = LookupOutcastBySlotIndex(slotIndexList[i]);
}
return resultList;
}
std::vector<std::tuple<int, int, int>> LookupConnectionSlotIndexFrom(const DevAscendFunction* func) const
{
std::vector<std::tuple<int, int, int>> connectionList;
for (size_t incastIndex = 0; incastIndex < GetIncastSize(); incastIndex++) {
const DevAscendFunctionIncast& incast = GetIncast(incastIndex);
for (size_t fromIndex = 0; fromIndex < incast.fromSlotList.size(); fromIndex++) {
int fromSlot = At(incast.fromSlotList, fromIndex);
for (size_t outcastIndex = 0; outcastIndex < func->GetOutcastSize(); outcastIndex++) {
const DevAscendFunctionOutcast& outcast = func->GetOutcast(outcastIndex);
for (size_t toIndex = 0; toIndex < outcast.toSlotList.size(); toIndex++) {
int toSlot = func->At(outcast.toSlotList, toIndex);
if (fromSlot == toSlot) {
connectionList.push_back(std::tuple(outcastIndex, incastIndex, fromSlot));
}
}
}
}
}
return connectionList;
}
inline const DevAscendRawTensor* GetOutcastRawTensor(int index) const
{
int tensorIndex = GetOutcast(index).tensorIndex;
return GetRawTensor(GetTensor(tensorIndex));
}
inline void GetTensorOffset(
uint64_t offset[DEV_SHAPE_DIM_MAX], const DevAscendRawTensor* rawTensor,
const DevAscendOperationOperandInfo& operandInfo) const
{
const SymInt* offsetSymList = &At(operationAttrList_, operandInfo.staticOffsetAttrBeginIndex);
for (int i = 0; i < rawTensor->GetDim(); i++) {
offset[i] = offsetSymList[i].IsExpression() ? At(expressionList, offsetSymList[i].Value()) :
offsetSymList[i].Value();
}
}
inline const SymInt* GetSymoffset(int offset) const { return &At(operationAttrList_, offset); }
struct SymIntPair {
const SymInt* offsetSymList;
const SymInt* shapeSymList;
};
inline SymIntPair GetTensorOffsetShapeSymList(int operationIndex, int offsetAttrIndex, int shapeAttrIndex) const
{
const SymInt* offsetSymList = &GetOperationAttr(operationIndex, offsetAttrIndex);
const SymInt* shapeSymList = &GetOperationAttr(operationIndex, shapeAttrIndex);
return SymIntPair{offsetSymList, shapeSymList};
}
inline const char* GetRawName() const { return &At(rawName_, 0); }
private:
friend struct EncodeDevAscendFunctionInfo;
void InitIncastOutcastAttr(
uintdevptr_t& initOffset, const std::vector<std::shared_ptr<LogicalTensor>>& iList,
const std::vector<std::shared_ptr<LogicalTensor>>& oList, bool fillContent);
void InitOperationDynamicField(
uintdevptr_t& initOffset, DevAscendFunctionPredInfo predInfo, uint32_t stitchCount,
const std::unordered_map<uint64_t, int>& calleeHashIndexDict, const SymbolicExpressionTable* expressionTable,
const OrderedSet<Operation*>& callList, const std::vector<std::shared_ptr<LogicalTensor>>& incastTensorList,
const std::vector<std::shared_ptr<LogicalTensor>>& outcastTensorList,
const std::unordered_map<Operation*, OrderedSet<Operation*>>& callOpSuccDict, bool fillContent);
void FillExclusiveOutcastSlotMark(
const IncastOutcastLink* inoutLink, std::vector<bool>& isExclusiveOutcastSlotMarks);
void InitRawTensorAndMemoryRequirement(
uintdevptr_t& initOffset, const OrderedSet<std::shared_ptr<RawTensor>>& incastRawList,
const OrderedSet<std::shared_ptr<RawTensor>>& outcastRawList,
const OrderedSet<std::shared_ptr<RawTensor>>& rawList,
const std::unordered_map<int, std::shared_ptr<RawTensor>>& rawMagicToRawTensor,
const std::vector<EncodeRawTensorAttr>& rawAttrs, const EncodeDevAscendFunctionParam& param,
const SymbolicExpressionTable* expressionTable, bool fillContent);
void UpdateRawTensorDesc(
const std::shared_ptr<RawTensor>& rawTensor, size_t i, size_t incastRawListSize, DevAscendRawTensor& encoded);
void InitTensor(
uintdevptr_t& initOffset, const OrderedSet<std::shared_ptr<LogicalTensor>>& tlist,
const OrderedSet<std::shared_ptr<RawTensor>>& rawList, bool fillContent);
void InitOperation(
uintdevptr_t& initOffset, const SymbolicExpressionTable* expressionTable,
const OrderedSet<Operation*>& callList, const OrderedSet<std::shared_ptr<LogicalTensor>>& tlist,
const OrderedSet<std::shared_ptr<RawTensor>>& rawList,
const std::unordered_map<Operation*, uint64_t>& callOpPredDict,
const std::unordered_map<Operation*, OrderedSet<Operation*>>& callOpSuccDict,
const std::unordered_map<uint64_t, int>& calleeHashIndexDict,
const std::vector<int32_t>& stitchIndexList, const std::vector<int>& noPredOpList,
const std::vector<int>& noSuccOpList,
const std::unordered_map<Operation*, std::vector<int>>& copyOutResolveSuccIndexListDict, bool fillContent);
void InitOperationNoPredNoSuccIndices(
uintdevptr_t& initOffset, const OrderedSet<Operation*>& callList,
const std::unordered_map<Operation*, uint64_t>& callOpPredDict,
const std::unordered_map<Operation*, OrderedSet<Operation*>>& callOpSuccDict,
const std::vector<int>& noPredOpList, const std::vector<int>& noSuccOpList, bool fillContent);
void InitOperationBufferLayouts(
uintdevptr_t& initOffset, const OrderedSet<Operation*>& callList,
const std::unordered_map<Operation*, OrderedSet<Operation*>>& callOpSuccDict,
const std::unordered_map<Operation*, std::vector<int>>& copyOutResolveSuccIndexListDict);
void FillOperationEncodedContent(
const SymbolicExpressionTable* expressionTable, const OrderedSet<Operation*>& callList,
const OrderedSet<std::shared_ptr<LogicalTensor>>& tlist, const OrderedSet<std::shared_ptr<RawTensor>>& rawList,
const std::unordered_map<Operation*, uint64_t>& callOpPredDict,
const std::unordered_map<Operation*, OrderedSet<Operation*>>& callOpSuccDict,
const std::unordered_map<uint64_t, int>& calleeHashIndexDict,
const std::vector<int32_t>& stitchIndexList,
const std::unordered_map<Operation*, std::vector<int>>& copyOutResolveSuccIndexListDict, bool fillContent);
void PopulateOperationEncodedContent(
const SymbolicExpressionTable* expressionTable, const OrderedSet<Operation*>& callList,
const OrderedSet<std::shared_ptr<LogicalTensor>>& tlist, const OrderedSet<std::shared_ptr<RawTensor>>& rawList,
const std::unordered_map<Operation*, OrderedSet<Operation*>>& callOpSuccDict,
const std::unordered_map<uint64_t, int>& calleeHashIndexDict,
const std::vector<int32_t>& stitchIndexList,
const std::unordered_map<Operation*, std::vector<int>>& copyOutResolveSuccIndexListDict,
DevAscendFunctionDuppedData* dupData);
void PopulateOneEncodedOpOperandsAndAttrs(
size_t index, int& operanSize, int& staticAttributeSize, const SymbolicExpressionTable* expressionTable,
const OrderedSet<Operation*>& callList, const OrderedSet<std::shared_ptr<LogicalTensor>>& tlist,
const OrderedSet<std::shared_ptr<RawTensor>>& rawList,
const std::unordered_map<uint64_t, int>& calleeHashIndexDict,
const std::vector<int32_t>& stitchIndexList);
void PopulateOneEncodedOpGraphEdges(
size_t index, int& sucSize, int& copyOutResolveSuccIdxSize, const OrderedSet<Operation*>& callList,
const std::unordered_map<Operation*, OrderedSet<Operation*>>& callOpSuccDict,
const std::unordered_map<Operation*, std::vector<int>>& copyOutResolveSuccIndexListDict,
DevAscendFunctionDuppedData* dupData);
void VerifyOperationEncodedContent(
const OrderedSet<Operation*>& callList, const std::unordered_map<Operation*, uint64_t>& callOpPredDict,
DevAscendFunctionDuppedData* dupData);
void InitWrapInfo(uintdevptr_t& initOffset, const OrderedSet<Operation*>& callList, bool fillContent);
void InitIncastOutcast(
uintdevptr_t& initOffset, const std::vector<std::shared_ptr<LogicalTensor>>& incastTensorList,
const std::vector<std::shared_ptr<LogicalTensor>>& outcastTensorList,
const OrderedSet<std::shared_ptr<LogicalTensor>>& tlist,
const std::unordered_map<std::shared_ptr<LogicalTensor>, InoutOperationAttr>& incastOpAttrDict,
const std::unordered_map<std::shared_ptr<LogicalTensor>, InoutOperationAttr>& outcastOpAttrDict,
const EncodeDevAscendFunctionParam& param, const std::string& initRawName, bool fillContent);
};
}