* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
* \file utils.h
* \brief
*/
#ifndef UTILS_H
#define UTILS_H
#include "micro_api/kernel_micro_intf.h"
namespace AscendC {
constexpr static int64_t MAX_RANK_NUM = 64;
struct HcclA5OpResParam {
uint64_t workSpace;
uint64_t workSpaceSize;
uint32_t rankId;
uint32_t rankDim;
uint64_t winSzie;
uint64_t windowsIn[MAX_RANK_NUM];
uint64_t windowsOut[MAX_RANK_NUM];
uint64_t xnAddr;
uint64_t ckeAddr;
uint64_t msAddr;
uint64_t msSize;
};
__aicore__ inline uint64_t CeilDiv(uint64_t a, uint32_t b)
{
if (b == 0) {
return 0;
}
return (a + b - 1) / b;
};
__aicore__ inline uint64_t CeilAlign(uint64_t a, uint32_t b)
{
uint64_t bTemp = static_cast<uint64_t>(b);
return (bTemp == 0) ? a : CeilDiv(a, bTemp) * bTemp;
};
__aicore__ inline uint64_t BlockAlignMod(uint64_t a, uint32_t b)
{
if (b == 0) {
return 0;
}
uint64_t c = a % b;
return c ? c : b;
}
template<AscendC::HardEvent event>
__aicore__ inline void SyncFunc() {
AscendC::TEventID eventID = GetTPipePtr()->FetchEventID(event);
AscendC::SetFlag<event>(eventID);
AscendC::WaitFlag<event>(eventID);
}
static constexpr AscendC::MicroAPI::CastTrait castTrait = {AscendC::MicroAPI::RegLayout::ZERO,
AscendC::MicroAPI::SatMode::NO_SAT, AscendC::MicroAPI::MaskMergeMode::ZEROING, AscendC::RoundMode::CAST_NONE};
static __aicore__ inline void CastVf(__local_mem__ bfloat16_t* dstPtr, __local_mem__ fp8_e8m0_t* srcPtr, uint32_t count)
{
AscendC::MicroAPI::RegTensor<fp8_e8m0_t> srcReg;
AscendC::MicroAPI::RegTensor<fp8_e8m0_t> srcZeroReg;
AscendC::MicroAPI::RegTensor<fp8_e8m0_t> dstReg0;
AscendC::MicroAPI::RegTensor<fp8_e8m0_t> dstReg1;
AscendC::MicroAPI::RegTensor<bfloat16_t> bf16DstReg;
AscendC::MicroAPI::MaskReg maskReg;
maskReg = AscendC::MicroAPI::UpdateMask<bfloat16_t>(count);
AscendC::MicroAPI::DataCopy(srcReg, srcPtr);
AscendC::MicroAPI::Interleave(dstReg0, dstReg1, srcReg, srcZeroReg);
AscendC::MicroAPI::Cast<bfloat16_t, fp8_e8m0_t, castTrait>(bf16DstReg, dstReg0, maskReg);
AscendC::MicroAPI::DataCopy(dstPtr, bf16DstReg, maskReg);
}
}
#endif