* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
* \file runtime_agent.cpp
* \brief
*/
#include "machine/runtime/runner/runtime_agent.h"
#include "tilefwk/platform.h"
#include "adapter/api/hal_api.h"
namespace npu::tile_fwk {
struct AddrMapInPara {
unsigned int addr_type;
unsigned int devid;
};
struct AddrMapOutPara {
unsigned long long ptr;
unsigned long long len;
};
namespace {
constexpr int32_t MODULE_TYPE_AI_CORE = 4;
constexpr int32_t INFO_TYPE_OCCUPY = 8;
constexpr uint8_t AICORE_MAP_BUFF_LEN = 2;
constexpr uint32_t SUB_CORE_PER_AICORE = 3;
namespace DAV_2201 {
constexpr uint32_t MAX_CORE = 25;
}
namespace DAV_3510 {
constexpr uint32_t MAX_CORE = 36;
}
bool GetPgMask(uint64_t& valid, int32_t& deviceId)
{
deviceId = GetLogDeviceId();
uint64_t aicore_bitmap[AICORE_MAP_BUFF_LEN] = {0};
int32_t size_n = static_cast<int32_t>(sizeof(uint64_t)) * AICORE_MAP_BUFF_LEN;
auto ret = HalGetDeviceInfoByBuff(
static_cast<uint32_t>(deviceId), MODULE_TYPE_AI_CORE, INFO_TYPE_OCCUPY,
reinterpret_cast<void*>(&aicore_bitmap[0]), &size_n);
if (ret != HAL_ERROR_NONE) {
return false;
}
valid = aicore_bitmap[0];
return true;
}
}
int RuntimeAgent::GetAicoreRegInfo(std::vector<int64_t>& aic, std::vector<int64_t>& aiv, const int addrType)
{
int32_t deviceId = 0;
uint64_t valid = 0;
if (!GetPgMask(valid, deviceId)) {
MACHINE_LOGW("Get Device Info failed or no valid core exists.");
valid = 0xFFFFFFFF;
validGetPgMask = false;
}
MACHINE_LOGI("The valid cores are: %lu.", valid);
uint64_t coreStride = 8 * 1024 * 1024;
uint64_t subCoreStride = 0x100000ULL;
auto isValid = [&valid](int id) {
const uint64_t mask = (1ULL << 25) - 1;
return ((static_cast<uint64_t>(valid) ^ mask) & (1ULL << id)) == 0;
};
struct AddrMapInPara inMapPara;
struct AddrMapOutPara outMapPara;
inMapPara.devid = deviceId;
inMapPara.addr_type = addrType;
auto ret = HalMemCtl(
0, reinterpret_cast<void*>(&inMapPara), sizeof(struct AddrMapInPara), reinterpret_cast<void*>(&outMapPara),
nullptr);
if (ret != HAL_ERROR_NONE) {
MACHINE_LOGE(
HostLauncherErr::MAP_REG_ADDR_FAILED, "Map reg addr fail, maybe others are using current device. (ret=%d).",
ret);
return ret;
}
for (uint32_t i = 0; i < DAV_2201::MAX_CORE; i++) {
for (uint32_t j = 0; j < SUB_CORE_PER_AICORE; j++) {
uint64_t vaddr = 0UL;
if (isValid(i)) {
vaddr = outMapPara.ptr + (i * coreStride + j * subCoreStride);
}
if (j == 0) {
aic.push_back(vaddr);
} else {
aiv.push_back(vaddr);
}
}
}
return 0;
}
void RuntimeAgent::GetAicoreRegInfoForDAV3510(std::vector<int64_t>& regs, std::vector<int64_t>& regsPmu)
{
if (Platform::Instance().GetSoc().GetNPUArch() != NPUArch::DAV_3510) {
return;
}
constexpr uint32_t AICORE_PER_DIE = 18;
constexpr uint32_t AIV_BASE_OFFSET = 18;
constexpr uint32_t SUB_CORE_PER_DIE = AICORE_PER_DIE * SUB_CORE_PER_AICORE;
constexpr unsigned long SUB_CORE_STRIDE = 0x100000ULL;
constexpr unsigned long AIV_STRIDE = SUB_CORE_STRIDE;
constexpr unsigned long AIV_SECOND_STRIDE = 2 * SUB_CORE_STRIDE;
constexpr size_t MAX_INDEX = DAV_3510::MAX_CORE * SUB_CORE_PER_AICORE;
unsigned int devId = GetLogDeviceId();
struct ResMapInfo mapInfo;
mapInfo.target_proc_type = ProcessType::CP1;
mapInfo.res_type = ResMapType::AICORE;
mapInfo.flag = 0;
mapInfo.rsv[0] = 0;
regs.resize(MAX_INDEX);
regsPmu.resize(MAX_INDEX);
for (uint32_t coreIndex = 0; coreIndex < DAV_3510::MAX_CORE; coreIndex++) {
mapInfo.res_id = coreIndex;
unsigned long mapAddr;
unsigned int len = 0x300000;
(void)HalResMap(devId, &mapInfo, &mapAddr, &len);
uint32_t dieIdx = coreIndex / AICORE_PER_DIE;
uint32_t localIdx = coreIndex % AICORE_PER_DIE;
uint32_t dieBase = dieIdx * SUB_CORE_PER_DIE;
uint32_t aicoreIndex = dieBase + localIdx;
uint32_t aivFirstIndex = dieBase + AIV_BASE_OFFSET + localIdx * 2;
uint32_t aivSecondIndex = aivFirstIndex + 1;
regs[aicoreIndex] = mapAddr;
regsPmu[aicoreIndex] = mapAddr;
regs[aivFirstIndex] = mapAddr + AIV_STRIDE;
regsPmu[aivFirstIndex] = mapAddr + AIV_STRIDE;
regs[aivSecondIndex] = mapAddr + AIV_SECOND_STRIDE;
regsPmu[aivSecondIndex] = mapAddr + AIV_SECOND_STRIDE;
}
}
}