* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
* \file buffer.h
* \brief
*/
#ifndef UTIL_BUFFER_H_
#define UTIL_BUFFER_H_
namespace Ops {
namespace Base {
enum class MemLevel : uint8_t {
LEVEL_0 = 0,
LEVEL_1 = 1,
LEVEL_2 = 2,
};
constexpr static uint32_t BUF_TYPE_MTE2 = 0b00'001;
constexpr static uint32_t BUF_TYPE_MTE3 = 0b00'010;
constexpr static uint32_t BUF_TYPE_TEMP = 0b00'100;
constexpr static uint32_t BUF_TYPE_PLACEHOLDER = 0b01'000;
constexpr static uint32_t BUF_TYPE_SCALAR = 0b10'000;
constexpr static uint32_t BUF_PING = 0b000'01;
constexpr static uint32_t BUF_PONG = 0b000'10;
constexpr static uint8_t BUF_COMBINE_SHIFT = 5;
constexpr static uint32_t BUF_COMBINE_MASK = 0x1F;
constexpr static uint8_t BUF_COMBINED_MAX = 5;
constexpr static uint8_t BUF_PING_PONG = 2;
constexpr static uint8_t BUF_MAX_COUNT = 32;
enum class BufPosInList : uint8_t {
PERSIST_MTE2 = 0,
MTE2,
PERSIST_MTE3,
MTE3,
PERSIST_TEMP,
TEMP,
PONG_MTE3,
MAX_POS,
};
constexpr static int BUF_ALLOCATED_IDX = static_cast<int>(BufPosInList::MAX_POS);
constexpr static int BUF_TO_RELEASE_IDX = static_cast<int>(BufPosInList::MAX_POS);
* Buffer wrapper Tempalte
* Template Args:
* ID: buffer id
* T: buffer type
* Where: 0-ping, 1-pong
*/
template <int ID, uint32_t T, uint32_t Where = BUF_PING>
struct BufferWrapper {
constexpr static int BufferId = ID;
constexpr static uint32_t BufferType = T;
constexpr static uint32_t PingPong = Where;
};
* Template used to generate buffer wrappers.
* Template Args:
* N: max count
* T: buffer type
* Offset: buffer id offset
* Where: 0-ping, 1-pong
* Es: existed buffer wrappers
*/
template <int N, uint32_t T, uint32_t Offset = 0, uint32_t Where = BUF_PING, typename Es = Elems<>>
struct GenerateBufferWrappers {
using Type = typename GenerateBufferWrappers<N - 1, T, Offset, Where,
Es>::Type::template Append<BufferWrapper<N - 1 + Offset, T, Where>>;
};
template <uint32_t T, uint32_t Offset, uint32_t Where, typename Es>
struct GenerateBufferWrappers<0, T, Offset, Where, Es> {
using Type = Elems<>;
};
template <uint32_t T, uint32_t Offset, uint32_t Where, typename Es>
struct GenerateBufferWrappers<-1, T, Offset, Where, Es> {
using Type = Elems<>;
};
template <typename B, typename Es>
struct Mapping {
using Bind = B;
using Buffers = Es;
};
template <typename... Ts>
struct CombineBufferWrapper {};
template <typename T>
struct CombineBufferWrapper<T> {
constexpr static uint32_t BufferId = static_cast<uint32_t>(T::BufferId);
constexpr static uint32_t BufferType = T::BufferType;
constexpr static uint32_t PingPong = T::PingPong;
};
template <typename T, typename... Ts>
struct CombineBufferWrapper<T, Ts...> {
constexpr static uint32_t BufferId = static_cast<uint32_t>(T::BufferId) << (BUF_COMBINE_SHIFT * sizeof...(Ts)) |
CombineBufferWrapper<Ts...>::BufferId;
constexpr static uint32_t BufferType = T::BufferType << (BUF_COMBINE_SHIFT * sizeof...(Ts)) |
CombineBufferWrapper<Ts...>::BufferType;
constexpr static uint32_t PingPong = T::PingPong << (BUF_COMBINE_SHIFT * sizeof...(Ts)) |
CombineBufferWrapper<Ts...>::PingPong;
;
};
template <typename... Ts>
struct CombinedBufferWrappers
: BufferWrapper<static_cast<int>(static_cast<uint32_t>(sizeof...(Ts)) << (BUF_COMBINE_SHIFT * BUF_COMBINED_MAX) |
CombineBufferWrapper<Ts...>::BufferId),
CombineBufferWrapper<Ts...>::BufferType, CombineBufferWrapper<Ts...>::PingPong> {};
static constexpr int32_t CalcPongBufferId(int32_t bufferId, uint32_t bufferType, uint32_t pingPong, int pongOffset)
{
if (pingPong > BUF_PONG) {
if (pingPong == 0) {
return 0;
} else {
const uint32_t id = static_cast<uint32_t>(bufferId) & BUF_COMBINE_MASK;
const uint32_t type = bufferType & BUF_COMBINE_MASK;
const uint32_t pp = pingPong & BUF_COMBINE_MASK;
const uint32_t currentId = pp == BUF_PING ?
(type == BUF_TYPE_TEMP ? id : (id + static_cast<uint32_t>(pongOffset))) :
(id - static_cast<uint32_t>(pongOffset));
const int32_t idNext = static_cast<int32_t>(static_cast<uint32_t>(bufferId) >> BUF_COMBINE_SHIFT);
const uint32_t typeNext = bufferType >> BUF_COMBINE_SHIFT;
const uint32_t ppNext = pingPong >> BUF_COMBINE_SHIFT;
return static_cast<int32_t>(static_cast<uint32_t>(CalcPongBufferId(idNext, typeNext, ppNext, pongOffset))
<< BUF_COMBINE_SHIFT |
currentId);
}
} else {
return pingPong == BUF_PING ? ((bufferType == BUF_TYPE_TEMP || bufferType == BUF_TYPE_SCALAR) ?
bufferId :
(bufferType == BUF_TYPE_PLACEHOLDER ? -1 : (bufferId + pongOffset))) :
(bufferId - pongOffset);
}
}
template <typename Es, int PongOffset>
struct ExtractBufferId {};
template <typename... Ts, int PongOffset>
struct ExtractBufferId<Elems<Ts...>, PongOffset> {
static constexpr size_t size = sizeof...(Ts);
constexpr static int32_t arr[2][size] = {
{Ts::BufferId...}, {CalcPongBufferId(Ts::BufferId, Ts::BufferType, Ts::PingPong, PongOffset)...}};
constexpr static const int32_t* Value[2] = {arr[0], arr[1]};
};
struct PreReduceOnlyCopyInBufferId {
constexpr static int32_t arr[2][2] = {{0, 1}, {2, 3}};
constexpr static const int32_t* Value[2] = {arr[0], arr[1]};
};
template <int... Ints>
struct IntegerSequence {};
template <int N, int... Ints>
struct MakeIntegerSequenceAux : MakeIntegerSequenceAux<N - 1, N - 1, Ints...> {};
template <int... Ints>
struct MakeIntegerSequenceAux<0, Ints...> {
using Type = IntegerSequence<Ints...>;
};
template <int N>
using MakeIntegerSequence = typename MakeIntegerSequenceAux<N>::Type;
template <int BufferId, uint32_t N, int pos>
struct DecodeBufferIdWithPos {
constexpr static int Value = static_cast<int>(
static_cast<uint32_t>(BufferId) >> (BUF_COMBINE_SHIFT * (N - 1 - pos)) & BUF_COMBINE_MASK);
};
template <int BufferId>
struct CombinedBufferCount {
const static uint32_t tmp = static_cast<uint32_t>(BufferId) >> (BUF_COMBINE_SHIFT * BUF_COMBINED_MAX) &
BUF_COMBINE_MASK;
constexpr static uint32_t Value = (tmp == 0 || tmp > BUF_COMBINED_MAX) ? 1 : tmp;
};
template <int BufferId, typename intSeq>
struct DecodeBufferIdAux {};
template <int BufferId, int... Ints>
struct DecodeBufferIdAux<BufferId, IntegerSequence<Ints...>> {
constexpr static int Value[sizeof...(Ints)] = {DecodeBufferIdWithPos<BufferId, sizeof...(Ints), Ints>::Value...};
};
template <int BufferId, uint32_t N = CombinedBufferCount<BufferId>::Value>
struct DecodeBufferId {
using IntSequence = MakeIntegerSequence<static_cast<int>(N)>;
constexpr static const int* const Value = {DecodeBufferIdAux<BufferId, IntSequence>::Value};
};
template <typename inFuns, typename FunList, typename ToReleaseEs, int currentNodePos, uint32_t BufferType,
bool cache_brc = false, uint32_t Where = BUF_PING>
struct ReleaseBufferByTypeAux {};
template <typename FunList, typename ToReleaseEs, int currentNodePos, uint32_t BufferType, bool cache_brc,
uint32_t Where>
struct ReleaseBufferByTypeAux<Elems<>, FunList, ToReleaseEs, currentNodePos, BufferType, cache_brc, Where> {
using Type = Elems<>;
};
template <typename F, typename... Fs, typename FunList, typename ToReleaseEs, int currentNodePos, uint32_t BufferType,
bool cache_brc, uint32_t Where>
struct ReleaseBufferByTypeAux<Elems<F, Fs...>, FunList, ToReleaseEs, currentNodePos, BufferType, cache_brc, Where> {
protected:
template <class Target, class T>
struct BindEqual {
constexpr static bool Value = __aux::IsSameType<F, typename T::Bind>::Value;
};
template <class Target, class T>
struct BufferTypeEqual {
constexpr static bool Value = BufferType == T::BufferType && T::PingPong == Where;
};
constexpr static bool isCacheBrcNode = cache_brc && (Vec::IsCopyInBrcOp<typename F::Fun>::Value ||
Vec::IsVecBrcOp<typename F::Fun>::Value);
constexpr static bool ableToRelease = !isCacheBrcNode &&
__aux::InputIsAbleToFreeAux<FunList, currentNodePos + 1, F>();
using mappings = typename ToReleaseEs::template Filter<BindEqual>;
static_assert(mappings::Size == 1, "mapping::Size == 1");
using mapping = typename mappings::template At<0>;
using buffers = typename mapping::Buffers::template Filter<BufferTypeEqual>;
using left = typename ReleaseBufferByTypeAux<Elems<Fs...>, FunList, ToReleaseEs, currentNodePos, BufferType,
cache_brc, Where>::Type;
public:
using Type = typename __aux::Condition<ableToRelease, buffers, Elems<>>::template Union<left>;
};
template <typename FunList, typename ToReleaseEs, int currentNodePos, uint32_t BufferType, bool cache_brc = false,
uint32_t Where = BUF_PING>
struct ReleaseUnusedBufferByType {
using fun = typename FunList::template At<currentNodePos>;
using inFuns = typename fun::InNonScalarFuns;
using Type = typename ReleaseBufferByTypeAux<inFuns, FunList, ToReleaseEs, currentNodePos, BufferType, cache_brc,
Where>::Type;
};
template <typename inFuns, typename FunList, typename ToReleaseEs, int currentNodePos, bool cache_brc>
struct ReleaseUnusedInputAux {};
template <typename FunList, typename ToReleaseEs, int currentNodePos, bool cache_brc>
struct ReleaseUnusedInputAux<Elems<>, FunList, ToReleaseEs, currentNodePos, cache_brc> {
using Type = Elems<>;
};
template <typename F, typename... Fs, typename FunList, typename ToReleaseEs, int currentNodePos, bool cache_brc>
struct ReleaseUnusedInputAux<Elems<F, Fs...>, FunList, ToReleaseEs, currentNodePos, cache_brc> {
protected:
template <class Target, class T>
struct BindEqual {
constexpr static bool Value = __aux::IsSameType<F, typename T::Bind>::Value;
};
constexpr static bool isCacheBrcNode = cache_brc && (Vec::IsCopyInBrcOp<typename F::Fun>::Value ||
Vec::IsVecBrcOp<typename F::Fun>::Value);
constexpr static bool ableToRelease = !isCacheBrcNode &&
__aux::InputIsAbleToFreeAux<FunList, currentNodePos + 1, F>();
using mappings = typename ToReleaseEs::template Filter<BindEqual>;
static_assert(mappings::Size <= 1, "mapping::Size <= 1");
using left = typename ReleaseUnusedInputAux<Elems<Fs...>, FunList, ToReleaseEs, currentNodePos, cache_brc>::Type;
public:
using Type = typename __aux::Condition<ableToRelease, mappings, Elems<>>::template Union<left>;
};
template <typename FunList, typename ToReleaseEs, int currentNodePos, bool cache_brc>
struct ReleaseUnusedInput {
using fun = typename FunList::template At<currentNodePos>;
using inFuns = typename fun::InNonScalarFuns;
using needReleaseEs = typename ReleaseUnusedInputAux<inFuns, FunList, ToReleaseEs, currentNodePos, cache_brc>::Type;
using Type = typename ToReleaseEs::template Remove<needReleaseEs>::Type;
};
template <typename... Eses>
struct CheckEsListSize {};
template <>
struct CheckEsListSize<> {
constexpr static bool Value = false;
};
template <typename First, typename... Rest>
struct CheckEsListSize<First, Rest...> {
constexpr static bool rest = CheckEsListSize<Rest...>::Value;
constexpr static bool Value = (First::Size > 0) || rest;
};
template <typename Es>
struct PriorityGetFrontAux {};
template <typename T>
struct PriorityGetFrontAux<Elems<T>> {
using Type = typename T::template At<0>;
};
template <typename T, typename... Ts>
struct PriorityGetFrontAux<Elems<T, Ts...>> {
using left = typename PriorityGetFrontAux<Elems<Ts...>>::Type;
using Type = __aux::Condition<(T::Size > 0), typename T::template At<0>, left>;
};
template <typename... Eses>
struct PriorityGetFront {
static_assert(CheckEsListSize<Eses...>::Value, "At least one memory Es should not be empty.");
using Type = typename PriorityGetFrontAux<Elems<Eses...>>::Type;
};
template <typename BufListList>
struct BufListListDecoder {
public:
using PersistMte2Es = typename BufListList::template At<static_cast<int>(BufPosInList::PERSIST_MTE2)>;
using Mte2Es = typename BufListList::template At<static_cast<int>(BufPosInList::MTE2)>;
using PersistMte3Es = typename BufListList::template At<static_cast<int>(BufPosInList::PERSIST_MTE3)>;
using Mte3Es = typename BufListList::template At<static_cast<int>(BufPosInList::MTE3)>;
using PersistTmpEs = typename BufListList::template At<static_cast<int>(BufPosInList::PERSIST_TEMP)>;
using TmpEs = typename BufListList::template At<static_cast<int>(BufPosInList::TEMP)>;
using PongMte3Es = typename BufListList::template At<static_cast<int>(BufPosInList::PONG_MTE3)>;
};
template <typename FunList, typename BufListList, MemLevel MemLvl, bool cache = false>
struct AllocMte2 {
private:
using BufLists = BufListListDecoder<BufListList>;
using usedTmpEs = __aux::Condition<MemLvl == MemLevel::LEVEL_2 || cache, Elems<>, typename BufLists::TmpEs>;
using usedPongMte3Es = __aux::Condition<MemLvl != MemLevel::LEVEL_0 || cache, Elems<>,
typename BufLists::PongMte3Es>;
using usedPersistMte2Es = __aux::Condition<!cache, Elems<>, typename BufLists::PersistMte2Es>;
using usedMte2Es = __aux::Condition<cache, Elems<>, typename BufLists::Mte2Es>;
using mte2 = typename PriorityGetFront<usedPersistMte2Es, usedMte2Es, usedTmpEs, usedPongMte3Es>::Type;
using Mte2EsNext = typename BufLists::Mte2Es::template PopFront<mte2>::Type;
using TmpEsNext = typename BufLists::TmpEs::template PopFront<mte2>::Type;
using PongMte3EsNext = typename BufLists::PongMte3Es::template PopFront<mte2>::Type;
using PersistMte2EsNext = typename BufLists::PersistMte2Es::template PopFront<mte2>::Type;
public:
using Type = Elems<PersistMte2EsNext, Mte2EsNext, typename BufLists::PersistMte3Es, typename BufLists::Mte3Es,
typename BufLists::PersistTmpEs, TmpEsNext, PongMte3EsNext, mte2>;
};
template <typename FunList, typename BufListList, MemLevel MemLvl, bool cache = false>
struct AllocTempBuffer {
private:
using BufLists = BufListListDecoder<BufListList>;
using usedPongMte3Es = __aux::Condition<MemLvl != MemLevel::LEVEL_0 || cache, Elems<>,
typename BufLists::PongMte3Es>;
using usedMte2Es = __aux::Condition<MemLvl == MemLevel::LEVEL_2 || cache, Elems<>, typename BufLists::Mte2Es>;
using usedPersistTmpEs = __aux::Condition<!cache, Elems<>, typename BufLists::PersistTmpEs>;
using usedTmpEs = __aux::Condition<cache, Elems<>, typename BufLists::TmpEs>;
using tmp = typename PriorityGetFront<usedPersistTmpEs, usedTmpEs, usedPongMte3Es, usedMte2Es>::Type;
using TmpEsNext = typename BufLists::TmpEs::template PopFront<tmp>::Type;
using PongMte3EsNext = typename BufLists::PongMte3Es::template PopFront<tmp>::Type;
using Mte2EsNext = typename BufLists::Mte2Es::template PopFront<tmp>::Type;
using PersistTmpEsNext = typename BufLists::PersistTmpEs::template PopFront<tmp>::Type;
public:
using Type = Elems<typename BufLists::PersistMte2Es, Mte2EsNext, typename BufLists::PersistMte3Es,
typename BufLists::Mte3Es, PersistTmpEsNext, TmpEsNext, PongMte3EsNext, tmp>;
};
template <typename FunList, typename BufListList, MemLevel MemLvl, bool cache = false>
struct AllocMte3 {
private:
using BufLists = BufListListDecoder<BufListList>;
using usedTmpEs = __aux::Condition<MemLvl != MemLevel::LEVEL_0 || cache, Elems<>, typename BufLists::TmpEs>;
using usedPongMte3Es = __aux::Condition<MemLvl != MemLevel::LEVEL_0 || cache, Elems<>,
typename BufLists::PongMte3Es>;
using usedMte2Es = __aux::Condition<MemLvl != MemLevel::LEVEL_0 || cache, Elems<>, typename BufLists::Mte2Es>;
using usedPersistMte3Es = __aux::Condition<!cache, Elems<>, typename BufLists::PersistMte3Es>;
using usedMte3Es = __aux::Condition<cache, Elems<>, typename BufLists::Mte3Es>;
using mte3 = typename PriorityGetFront<usedPersistMte3Es, usedMte3Es, usedTmpEs, usedPongMte3Es, usedMte2Es>::Type;
using Mte3EsNext = typename BufLists::Mte3Es::template PopFront<mte3>::Type;
using TmpEsNext = typename BufLists::TmpEs::template PopFront<mte3>::Type;
using PongMte3EsNext = typename BufLists::PongMte3Es::template PopFront<mte3>::Type;
using Mte2EsNext = typename BufLists::Mte2Es::template PopFront<mte3>::Type;
using PersistMte3EsNext = typename BufLists::PersistMte3Es::template PopFront<mte3>::Type;
public:
using Type = Elems<typename BufLists::PersistMte2Es, Mte2EsNext, PersistMte3EsNext, Mte3EsNext,
typename BufLists::PersistTmpEs, TmpEsNext, PongMte3EsNext, mte3>;
};
template <typename FunList, typename BufListList, typename ToReleaseEs, bool cache_brc, int funPos>
struct ReleaseAndUpdateEs {
private:
using BufLists = BufListListDecoder<BufListList>;
using PersistMte2Es = typename BufLists::PersistMte2Es;
using Mte2EsNext = typename BufLists::Mte2Es::template Concat<
typename ReleaseUnusedBufferByType<FunList, ToReleaseEs, funPos, BUF_TYPE_MTE2, cache_brc>::Type>;
using PersistMte3Es = typename BufLists::PersistMte3Es;
using Mte3EsNext = typename BufLists::Mte3Es::template Concat<
typename ReleaseUnusedBufferByType<FunList, ToReleaseEs, funPos, BUF_TYPE_MTE3, cache_brc>::Type>;
using PersistTmpEs = typename BufLists::PersistTmpEs;
using TmpEsNext = typename BufLists::TmpEs::template Concat<
typename ReleaseUnusedBufferByType<FunList, ToReleaseEs, funPos, BUF_TYPE_TEMP, cache_brc>::Type>;
using PongMte3EsNext = typename BufLists::PongMte3Es::template Concat<
typename ReleaseUnusedBufferByType<FunList, ToReleaseEs, funPos, BUF_TYPE_MTE3, cache_brc, BUF_PONG>::Type>;
using ToReleaseEsNext = typename ReleaseUnusedInput<FunList, ToReleaseEs, funPos, cache_brc>::Type;
public:
using Type = Elems<PersistMte2Es, Mte2EsNext, PersistMte3Es, Mte3EsNext, PersistTmpEs, TmpEsNext, PongMte3EsNext,
ToReleaseEsNext>;
};
* 策略1/2/3遍历图生成BufferId列表
* 模板参数:
* 1. FunList: 完整的计算图执行节点列表
* 2. BufListList:空闲的 Mte2/Mte3/TmpBuffer/PongMte3 列表
* 3. PongOffset: Pong Buffer ID偏移量
* 4. MemLvl: 内存策略
* 5. use_nddma: CopyInBrc是否使用NDDMA
* 6. cache_brc: 是否Cache CopyInBrc & VecBrc节点
* 7. AllocEs: 当前已分配的BufferWrapper列表
* 8. ToReleaseEs: 待释放的Mapping<Bind, Elems<BufferWrapper...>>列表
* 9. scalarIdx: 下一个scalar索引
* 10. funPos: 当前执行节点位置
* 返回值:
* 1. std::array<int, N2>: 完整的计算图PingPong BufferId列表
*/
template <typename FunList, typename BufListList, int PongOffset, MemLevel MemLvl = MemLevel::LEVEL_2,
bool use_nddma = true, bool cache_brc = false, typename AllocEs = Elems<>, typename ToReleaseEs = Elems<>,
#ifdef __ATP_UT__
int scalarIdx = 50,
#else
int scalarIdx = 0,
#endif
int funPos = 0>
__aicore__ static constexpr const int32_t* const* GenerateBufferIdOrder()
{
if constexpr (funPos < FunList::Size) {
using fun = typename FunList::template At<funPos>;
if constexpr (fun::IsScalarOp) {
using AllocEsNext = typename AllocEs::template Append<BufferWrapper<scalarIdx, BUF_TYPE_SCALAR>>;
return GenerateBufferIdOrder<FunList, BufListList, PongOffset, MemLvl, use_nddma, cache_brc, AllocEsNext,
ToReleaseEs, scalarIdx + 1, funPos + 1>();
} else if constexpr (Vec::IsCopyInBrcOp<typename fun::Fun>::Value && !use_nddma) {
using NextEs0 = typename AllocMte2<FunList, BufListList, MemLvl, cache_brc>::Type;
using mte2 = typename NextEs0::template At<BUF_ALLOCATED_IDX>;
if constexpr (__aux::IsConnectOutput<FunList, funPos + 1, fun>()) {
using NextEs = typename AllocMte3<FunList, NextEs0, MemLvl, cache_brc>::Type;
using mte3 = typename NextEs::template At<BUF_ALLOCATED_IDX>;
using AllocEsNext = typename AllocEs::template Append<CombinedBufferWrappers<mte3, mte2>>;
using ToReleaseEsNext = typename ToReleaseEs::template Append<Mapping<fun, Elems<mte3, mte2>>>;
return GenerateBufferIdOrder<FunList, NextEs, PongOffset, MemLvl, use_nddma, cache_brc, AllocEsNext,
ToReleaseEsNext, scalarIdx, funPos + 1>();
} else {
using NextEs = typename AllocTempBuffer<FunList, NextEs0, MemLvl, cache_brc>::Type;
using tmp = typename NextEs::template At<BUF_ALLOCATED_IDX>;
using AllocEsNext = typename AllocEs::template Append<CombinedBufferWrappers<tmp, mte2>>;
using ToReleaseEsNext = typename ToReleaseEs::template Append<Mapping<fun, Elems<tmp, mte2>>>;
return GenerateBufferIdOrder<FunList, NextEs, PongOffset, MemLvl, use_nddma, cache_brc, AllocEsNext,
ToReleaseEsNext, scalarIdx, funPos + 1>();
}
} else if constexpr (Vec::IsCopyInOp<typename fun::Fun>::Value) {
using NextEs = typename AllocMte2<
FunList, BufListList, MemLvl,
(cache_brc && Vec::IsCopyInBrcOp<typename fun::Fun>::Value)>::Type;
using mte2 = typename NextEs::template At<BUF_ALLOCATED_IDX>;
using AllocEsNext = typename AllocEs::template Append<mte2>;
using ToReleaseEsNext = typename ToReleaseEs::template Append<Mapping<fun, Elems<mte2>>>;
return GenerateBufferIdOrder<FunList, NextEs, PongOffset, MemLvl, use_nddma, cache_brc, AllocEsNext,
ToReleaseEsNext, scalarIdx, funPos + 1>();
} else if constexpr (Vec::IsCopyOutOp<typename fun::Fun>::Value) {
using NextEs = typename ReleaseAndUpdateEs<FunList, BufListList, ToReleaseEs, cache_brc, funPos>::Type;
using ToReleaseEsNext = typename NextEs::template At<BUF_TO_RELEASE_IDX>;
using AllocEsNext = typename AllocEs::template Append<BufferWrapper<-1, BUF_TYPE_PLACEHOLDER>>;
return GenerateBufferIdOrder<FunList, NextEs, PongOffset, MemLvl, use_nddma, cache_brc, AllocEsNext,
ToReleaseEsNext, scalarIdx, funPos + 1>();
} else {
if constexpr (__aux::IsConnectOutput<FunList, funPos + 1, fun>()) {
using NextEs0 = typename AllocMte3<
FunList, BufListList, MemLvl,
(cache_brc && Vec::IsVecBrcOp<typename fun::Fun>::Value)>::Type;
using mte3 = typename NextEs0::template At<BUF_ALLOCATED_IDX>;
using AllocEsNext = typename AllocEs::template Append<mte3>;
using NextEs = typename ReleaseAndUpdateEs<FunList, NextEs0, ToReleaseEs, cache_brc, funPos>::Type;
using ToReleaseEsNext = typename NextEs::template At<BUF_TO_RELEASE_IDX>::template Append<
Mapping<fun, Elems<mte3>>>;
return GenerateBufferIdOrder<FunList, NextEs, PongOffset, MemLvl, use_nddma, cache_brc, AllocEsNext,
ToReleaseEsNext, scalarIdx, funPos + 1>();
} else {
using NextEs0 = typename AllocTempBuffer<
FunList, BufListList, MemLvl,
(cache_brc && Vec::IsVecBrcOp<typename fun::Fun>::Value)>::Type;
using tmp = typename NextEs0::template At<BUF_ALLOCATED_IDX>;
using AllocEsNext = typename AllocEs::template Append<tmp>;
using NextEs = typename ReleaseAndUpdateEs<FunList, NextEs0, ToReleaseEs, cache_brc, funPos>::Type;
using ToReleaseEsNext = typename NextEs::template At<BUF_TO_RELEASE_IDX>::template Append<
Mapping<fun, Elems<tmp>>>;
return GenerateBufferIdOrder<FunList, NextEs, PongOffset, MemLvl, use_nddma, cache_brc, AllocEsNext,
ToReleaseEsNext, scalarIdx, funPos + 1>();
}
}
} else {
static_assert(FunList::Size == AllocEs::Size, "N == AllocEs::Size");
return ExtractBufferId<AllocEs, PongOffset>::Value;
}
}
}
}
#endif