* Copyright (c) 2026 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
* \file hccl_aicpu_def.h
* \brief
*/
#ifndef IMPL_V220_HCCL_AICPU_DEF_H
#define IMPL_V220_HCCL_AICPU_DEF_H
namespace AscendC {
template<const auto &config>
class HcclImpl<HcclServerType::HCCL_SERVER_TYPE_AICPU, config> {
public:
template <bool commit = false>
__aicore__ inline HcclHandle AllReduce(GM_ADDR sendBuf, GM_ADDR recvBuf, uint64_t count,
HcclDataType dataType, HcclReduceOp op, uint8_t repeat = 1);
template <bool commit = false>
__aicore__ inline HcclHandle AllGather(GM_ADDR sendBuf, GM_ADDR recvBuf, uint64_t sendCount,
HcclDataType dataType, uint64_t strideCount, uint8_t repeat = 1);
template <bool commit = false>
__aicore__ inline HcclHandle ReduceScatter(GM_ADDR sendBuf, GM_ADDR recvBuf, uint64_t recvCount,
HcclDataType dataType, HcclReduceOp op, uint64_t strideCount,
uint8_t repeat = 1);
template <bool commit = false>
__aicore__ inline HcclHandle AlltoAll(GM_ADDR sendBuf, GM_ADDR recvBuf, uint64_t dataCount,
HcclDataType dataType, uint64_t strideCount = 0, uint8_t repeat = 1);
template <bool commit = false>
__aicore__ inline HcclHandle AlltoAllV(GM_ADDR sendBuf, void *sendCounts, void *sdispls, HcclDataType sendType,
GM_ADDR recvBuf, void *recvCounts, void *rdispls, HcclDataType recvType,
uint8_t repeat = 1);
template <bool commit = false>
__aicore__ inline HcclHandle BatchWrite(GM_ADDR batchWriteInfo, uint32_t itemNum, uint16_t queueID);
template <bool commit = false>
__aicore__ inline HcclHandle AlltoAllvWrite(GM_ADDR usrIn, GM_ADDR sendOffsets, GM_ADDR sendSizes,
uint64_t remoteWinOffset, uint64_t localDataSize);
public:
__aicore__ inline void Init(GM_ADDR context, __gm__ void *initTiling = nullptr);
__aicore__ inline void InitV2(GM_ADDR context, const void *initTiling);
__aicore__ inline int32_t SetCcTiling(__gm__ void *ccOpTilingData);
__aicore__ inline int32_t SetCcTilingV2(uint64_t offset);
__aicore__ inline void Commit(HcclHandle handleId);
__aicore__ inline int32_t Wait(HcclHandle handleId);
__aicore__ inline int32_t Query(HcclHandle handleId);
__aicore__ inline void InterHcclGroupSync(int8_t srcGroupID, HcclHandle srcHandleID);
template <ScopeType type = ScopeType::ALL>
__aicore__ inline void QueueBarrier(uint16_t queueID);
template <bool sync = true>
__aicore__ inline int32_t Iterate(HcclHandle handleId, uint16_t *seqSlices, uint16_t seqSliceLen);
template <bool sync = true>
__aicore__ inline void Finalize();
public:
__aicore__ inline GM_ADDR GetWindowsInAddr(uint32_t rankId);
__aicore__ inline GM_ADDR GetWindowsOutAddr(uint32_t rankId);
__aicore__ inline uint32_t GetRankId();
__aicore__ inline uint32_t GetRankDim();
__aicore__ inline uint16_t GetQueueNum() { return queueNum_; }
private:
template <bool commit = false>
__aicore__ inline HcclHandle CommonPrepareImpl(const CommonPrepareParam ¶m);
__aicore__ inline bool CheckCommonPrepareParamValid(const CommonPrepareParam ¶m);
__aicore__ inline void ResetFinishedTurnCnt();
template <bool sync>
__aicore__ inline void SendFinalizeMsg();
__aicore__ inline void SendMsgToServer(uint16_t queId, const CommonPrepareParam ¶,
int8_t srcGroupID = -1, HcclHandle srcHandleID = INVALID_HANDLE_ID);
__aicore__ inline void SendMsgToServer(const AlltoAllVParamExt ¶);
__aicore__ inline uint16_t GetStepSizeByHandle(HcclHandle handle);
__aicore__ inline uint16_t GetStepCntsPerRepeatByHandle(HcclHandle handle);
__aicore__ inline void SetCommitTurnCntToGm(uint8_t msgPos, uint64_t turnCnt, HcclHandle handleId);
__aicore__ inline uint64_t WaitFinishCntFromGm(uint8_t msgPos, uint64_t expectedCnt);
__aicore__ inline void InitWorkingFlag();
__aicore__ inline void InitInner(uint64_t msgAddr, HcclTilingVersion version);
private:
uint64_t ccOpTilingDataTable_[static_cast<uint32_t>(HcclCMDType::HCCL_CMD_ALL)] = {0UL};
uint64_t ccOpParamTable_[static_cast<uint32_t>(HcclCMDType::HCCL_CMD_ALL)] = {0UL};
#if __NPU_ARCH__ == 3510
__gm__ OpResCtx *hcclContext_;
#else
__gm__ HcclCombineOpParam *hcclContext_;
#endif
__gm__ HcclMsgArea *hcclMsgArea_;
uint64_t tilingBaseAddr_;
uint16_t queueNum_ = 0U;
uint16_t handleId2CurrSliceId_[HCCL_MAX_HANDLE_ID] = {0U};
uint16_t handleIdCommitTurnCnt_[HCCL_MAX_HANDLE_ID] = {0U};
uint16_t handleIdWaitCallNum_[HCCL_MAX_HANDLE_ID] = {0U};
uint8_t handleId2CmdType_[HCCL_MAX_HANDLE_ID] = {0U};
int8_t handleIdMsgPosition_[HCCL_MAX_HANDLE_ID];
uint8_t handleIdRepeat_[HCCL_MAX_HANDLE_ID] = {0U};
uint8_t curMsgPosition_[MAX_QUE_NUM] = {0U};
HcclHandle curHandleId_ = INVALID_HANDLE_ID;
HcclTilingVersion curVersion_ = HcclTilingVersion::INVALID_TILING_VERSION;
uint8_t workingFlag_ = false;
uint8_t debugMode_ = 0U;
uint8_t devType_;
};
}
#endif