* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
* \file shmem_wait_until.cpp
* \brief
*/
#include "shmem_wait_until.h"
#include <cstddef>
#include <type_traits>
#include <vector>
#include <cstring>
#include <cstdint>
#include "securec.h"
#include "machine/device/dynamic/aicore_manager.h"
#include "tileop/distributed/comm_context.h"
#include "machine/utils/device_log.h"
#include "machine/utils/dynamic/dev_workspace.h"
#include "neon_stub.h"
#include "machine/device/dynamic/device_utils.h"
namespace npu::tile_fwk::Distributed {
constexpr int32_t AICPU_ATTR_RAW_INDEX = 3;
inline bool SignalTileOp::PollCompleted() const
{
if constexpr (!npu::tile_fwk::dynamic::IsDeviceMode()) {
return true;
}
if (addr_[0] != expectedSum_) {
return false;
}
DEV_DEBUG("expectedSum_=%d, addr_[0]=%d, resolve taskId=%lu", expectedSum_, addr_[0], taskId_);
if (resetSignal_) {
addr_[0] = 0;
}
return true;
}
int32_t ShmemWaitUntilImpl::PollCompleted(npu::tile_fwk::dynamic::AiCoreManager* aicoreManager, uint32_t parallelIdx)
{
return runingTaskQueue_[parallelIdx].PollCompleted([&](SignalTileOp* task) {
if (aicoreManager == nullptr) {
DEV_ERROR(DistributedErrorCode::NULLPTR, "sche.task.pre.task.poll#: AicoreManager is nullptr");
return dynamic::DEVICE_MACHINE_ERROR;
}
return aicoreManager->ProcessCompletedAicpuTask(task->taskId_);
});
}
TensorInfo ShmemWaitUntilImpl::GetTensorInfo(
uint64_t taskId, const npu::tile_fwk::dynamic::DevRelocVector<int32_t>& aicpuCode,
npu::tile_fwk::DynFuncData* funcDataList, int64_t* hcclContextAddr, const AicpuParamInfo& paramInfo)
{
const uint32_t funcId = npu::tile_fwk::FuncID(taskId);
const uint32_t opIndex = npu::tile_fwk::TaskID(taskId);
auto& funcData = funcDataList[funcId];
auto opAttrs = &funcData.opAttrs[funcData.opAtrrOffsets[opIndex]];
auto expressionTable = funcData.exprTbl;
int32_t index = aicpuCode[paramInfo.inIndex + AICPU_ATTR_RAW_INDEX];
TensorInfo info;
info.rawIndex = GetCoa(index, opAttrs, expressionTable);
++index;
info.dim = aicpuCode[paramInfo.inIndex + AICPU_ATTR_DIM_INDEX];
info.offset = GetCoaVector(index, info.dim, opAttrs, expressionTable);
info.expectedSum = aicpuCode[paramInfo.attrIndex];
info.resetSignal = aicpuCode[paramInfo.attrIndex + AICPU_ATTR_DIM_INDEX];
auto desc = &funcData.rawTensorDesc[info.rawIndex];
info.vaddr = funcData.rawTensorAddr[desc->offsetOrIndex];
info.rawAddr = MapVirtualSignalAddr(hcclContextAddr, info.vaddr);
return info;
}
}