* This file is part of the MindStudio project.
* Copyright (c) 2025 Huawei Technologies Co.,Ltd.
*
* MindStudio is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
*
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
* ------------------------------------------------------------------------- */
#include "ProfTask.h"
#include <atomic>
#include <thread>
#include "KernelContext.h"
#include "utils/Ustring.h"
#include "ascend_hal/AscendHalOrigin.h"
#include "utils/FileSystem.h"
#include "utils/InjectLogger.h"
#include "ProfDataCollect.h"
#include "DeviceContext.h"
using namespace std;
constexpr int PROF_CHANNEL_BUFFER_SIZE = 1024 * 1024 * 2;
constexpr int PROF_CHANNEL_NUM = 18;
constexpr uint32_t FFTS_PROF_CFG_MODE = 3;
constexpr uint32_t FFTS_PROF_CFG_TYPE = 0x5;
constexpr uint32_t FFTS_PROF_CFG_CORE_MASK = 0xffffffff;
constexpr uint32_t PROF_CFG_PERIOD = 0;
constexpr uint32_t AICPU_SAMPLE_PERIOD = 100;
constexpr uint32_t STARS_ENABLE_FLAG = 1;
constexpr uint32_t AICORE_TAG = 0;
constexpr uint32_t AICORE_TASK_BASE_TYPE = 0;
constexpr uint32_t HWTS_TAG = 0;
constexpr uint32_t MAX_BIN_FILE_SIZE = 1024 * 1024 * 1024;
constexpr int FFTS_PROF_MODE_BULT = 2;
constexpr int FFTS_PROF_BLOCK_SHINK_DISABLE = 2;
constexpr int FFTS_PROF_AIC_SCALE_ALL = 0;
constexpr int FFTS_PROF_AIC_SCALE_PARTIAL = 1;
constexpr int INSTR_PROF_PERIOD = 32;
constexpr uint16_t INSTR_PROF_MODE_BIU_PERF = 0;
constexpr uint16_t INSTR_PROF_MODE_PC_SAMPLING = 1;
using StarsSocLogConfigT = struct TagStarsSocLogConfig {
uint32_t acsq_task;
uint32_t acc_pmu;
uint32_t cdqm_reg;
uint32_t dvpp_vpc_block;
uint32_t dvpp_jpegd_block;
uint32_t dvpp_jpede_block;
uint32_t ffts_thread_task;
uint32_t ffts_block;
uint32_t sdma_dmu;
};
using TsAiCoreProfileConfigT = struct TagTsAiCoreProfileConfig {
uint32_t type;
uint32_t almostFullThreshold;
uint32_t period;
uint32_t coreMask;
uint32_t eventNum;
uint32_t event[PMU_EVENT_MAX_NUM];
uint32_t tag;
};
using StarsAiCoreProfileConfigT = struct TagStarsAiCoreConfig {
uint32_t type;
uint32_t period;
uint32_t core_mask;
uint32_t event_num;
uint16_t event[PMU_EVENT_MAX_NUM];
};
using FftsProfileConfigT = struct TagFftsProfileConfig {
uint32_t cfgMode;
StarsAiCoreProfileConfigT aiEventCfg[FFTS_PROF_MODE_BULT];
uint32_t tag = 0;
};
using AccProfileConfigT = struct TagAccProfileConfig {
uint32_t type;
uint32_t period;
uint32_t coreMask;
uint32_t eventNum;
uint16_t event[10];
};
using StarsA5SocLogConfigT = struct TagStarsA5SocLogConfig {
uint32_t acsq_task;
uint32_t acc_pmu;
uint32_t cdqm_reg;
uint32_t dvpp_vpc_block;
uint32_t dvpp_jpegd_block;
uint32_t dvpp_jpede_block;
uint32_t ffts_thread_task;
uint32_t ffts_block;
uint32_t sdma_dmu;
uint32_t tag;
uint32_t block_shrink_flag;
};
using FftsA5ProfileConfigT = struct TagFftsA5ProfileConfig {
uint32_t cfgMode;
AccProfileConfigT aiEventCfg[FFTS_PROF_MODE_BULT];
uint32_t tag;
uint32_t blockShinkFlag;
uint32_t aicScale;
};
using InstrProfileConfigT = struct TagInstrProfileConfig {
uint32_t period;
uint32_t biuPcSamplingMode;
uint32_t groupType;
uint32_t groupNo;
};
using InstrProfHeadInfoT = struct TagInstrProfHeadInfo {
uint16_t coreId = 0;
uint16_t coreType = 0;
uint32_t validLen = 0;
};
enum class InstrProfGrpType : uint32_t {
AIC = 0,
AIV0,
AIV1,
};
enum class InstrProfGrpId : uint32_t {
GROUP0 = 0,
GROUP1,
GROUP2,
GROUP3,
GROUP4,
GROUP5,
};
struct InstrProfChnInfo {
InstrProfGrpType grpType;
InstrProfGrpId grpId;
};
class ProfTaskOfA5 : public ProfTask {
public:
ProfTaskOfA5(const MessageOfProfConfig &profTaskConfig, uint32_t deviceId)
: ProfTask(profTaskConfig, deviceId)
{
instrProfChn_ = {
{InstrChannel::GROUP0_AIC, {InstrProfGrpType::AIC, InstrProfGrpId::GROUP0}},
{InstrChannel::GROUP0_AIV0, {InstrProfGrpType::AIV0, InstrProfGrpId::GROUP0}},
{InstrChannel::GROUP0_AIV1, {InstrProfGrpType::AIV1, InstrProfGrpId::GROUP0}},
{InstrChannel::GROUP1_AIC, {InstrProfGrpType::AIC, InstrProfGrpId::GROUP1}},
{InstrChannel::GROUP1_AIV0, {InstrProfGrpType::AIV0, InstrProfGrpId::GROUP1}},
{InstrChannel::GROUP1_AIV1, {InstrProfGrpType::AIV1, InstrProfGrpId::GROUP1}},
{InstrChannel::GROUP2_AIC, {InstrProfGrpType::AIC, InstrProfGrpId::GROUP2}},
{InstrChannel::GROUP2_AIV0, {InstrProfGrpType::AIV0, InstrProfGrpId::GROUP2}},
{InstrChannel::GROUP2_AIV1, {InstrProfGrpType::AIV1, InstrProfGrpId::GROUP2}},
{InstrChannel::GROUP3_AIC, {InstrProfGrpType::AIC, InstrProfGrpId::GROUP3}},
{InstrChannel::GROUP3_AIV0, {InstrProfGrpType::AIV0, InstrProfGrpId::GROUP3}},
{InstrChannel::GROUP3_AIV1, {InstrProfGrpType::AIV1, InstrProfGrpId::GROUP3}},
{InstrChannel::GROUP4_AIC, {InstrProfGrpType::AIC, InstrProfGrpId::GROUP4}},
{InstrChannel::GROUP4_AIV0, {InstrProfGrpType::AIV0, InstrProfGrpId::GROUP4}},
{InstrChannel::GROUP4_AIV1, {InstrProfGrpType::AIV1, InstrProfGrpId::GROUP4}},
{InstrChannel::GROUP5_AIC, {InstrProfGrpType::AIC, InstrProfGrpId::GROUP5}},
{InstrChannel::GROUP5_AIV0, {InstrProfGrpType::AIV0, InstrProfGrpId::GROUP5}},
{InstrChannel::GROUP5_AIV1, {InstrProfGrpType::AIV1, InstrProfGrpId::GROUP5}},
};
}
bool Start(uint32_t replayCount, bool hasSimt) override;
void Stop() override;
private:
bool WriteInstrChannelData(const std::string &prefixName, InstrChannel channelId,
const char *data, int validLen, InstrChnReadCtrl &instrChnReadController) override;
bool StartInstrProfTask(int mode);
void StopInstrProfChannels();
std::map<InstrChannel, InstrProfChnInfo> instrProfChn_;
std::atomic<InstrProfState> instrProfState_ {InstrProfState::IDLE};
private:
bool isStarsStart_ = false;
bool StartStarsTask();
bool StartFFTSTask(uint32_t replayCount);
};
class ProfTaskOf910B : public ProfTask {
public:
ProfTaskOf910B(const MessageOfProfConfig &profTaskConfig, uint32_t deviceId)
: ProfTask(profTaskConfig, deviceId) {}
bool Start(uint32_t replayCount, bool hasSimt) override;
void Stop() override;
private:
bool isStarsStart_ = false;
bool StartHCCSTask();
bool StartTSFWTask();
bool StartStarsTask();
bool StartFFTSTask(uint32_t replayCount);
};
class ProfTaskOf310P : public ProfTask {
public:
ProfTaskOf310P(const MessageOfProfConfig &profTaskConfig, uint32_t deviceId)
: ProfTask(profTaskConfig, deviceId) {}
bool Start(uint32_t replayCount, bool hasSimt) override;
void Stop() override;
private:
bool StartHwtsTask();
bool isL2CacheStart_ = false;
bool isAiCoreStart_ = false;
bool isHwtsStart_ = false;
};
class InstrProfTask {
public:
bool GetTask(int mode, prof_start_para_t &instrProfStartPara) const;
};
class FftsA5Task {
public:
bool GetTask(uint16_t *aicEventPtr, uint16_t *aivEventPtr, prof_start_para_t &fftsA5ProfStartPara) const;
bool useProfileMode {false};
};
class FftsTask {
public:
bool GetTask(uint16_t *aicEventPtr, uint16_t *aivEventPtr, prof_start_para_t &fftsProfStartPara) const;
bool useProfileMode {false};
};
class StarsTask {
public:
bool GetTask(prof_start_para_t &starsProfStartPara) const;
bool GetTaskA5(prof_start_para_t &starsProfStartPara) const;
};
class AiCoreTask {
public:
bool GetTask(uint16_t *eventPtr, prof_start_para_t &aiCoreProfStartPara) const;
};
using TsHwtsProfileConfigT = struct TagTsHwtsProfileConfig {
uint32_t tag;
};
class HwtsTask {
public:
bool GetTask(prof_start_para_t &hwtsProfStartPara) const;
};
using TsL2CacheProfileConfigT = struct TagTsL2CacheProfileConfig {
uint32_t eventNum;
uint32_t event[PMU_EVENT_MAX_NUM];
};
class L2CacheTask {
public:
bool GetTask(uint16_t *eventPtr, prof_start_para_t &l2CacheProfStartPara) const;
bool GetL2cacheEvict(prof_start_para_t &l2CacheProfStartPara, uint16_t *l2EventPtr) const;
};
class HccsTask {
public:
bool GetTask(prof_start_para_t &l2CacheProfStartPara) const;
};
class AicpuTask {
public:
void GetTask(prof_start_para_t &aicpuProfStartPara) const;
};
void AicpuTask::GetTask(prof_start_para_t &aicpuProfStartPara) const
{
aicpuProfStartPara.channel_type = PROF_PERIPHERAL_TYPE;
aicpuProfStartPara.sample_period = AICPU_SAMPLE_PERIOD;
aicpuProfStartPara.real_time = PROF_REAL;
aicpuProfStartPara.user_data = nullptr;
aicpuProfStartPara.user_data_size = 0;
}
bool InstrProfTask::GetTask(int mode, prof_start_para_t &instrProfStartPara) const
{
uint32_t instrConfigSize = sizeof(InstrProfileConfigT);
auto *configPtrInstrProf = static_cast<InstrProfileConfigT *>(malloc(instrConfigSize));
if (configPtrInstrProf == nullptr) {
ERROR_LOG("Can not get user data pointer while getting instr profile task");
return false;
}
instrProfStartPara.user_data = nullptr;
std::fill_n(reinterpret_cast<char *>(configPtrInstrProf), instrConfigSize, 0);
configPtrInstrProf->period = INSTR_PROF_PERIOD;
configPtrInstrProf->biuPcSamplingMode = static_cast<uint32_t>(mode);
configPtrInstrProf->groupType = 0;
configPtrInstrProf->groupNo = 0;
instrProfStartPara.channel_type = PROF_TS_TYPE;
instrProfStartPara.sample_period = 0;
instrProfStartPara.real_time = PROF_REAL;
instrProfStartPara.user_data = configPtrInstrProf;
instrProfStartPara.user_data_size = instrConfigSize;
return true;
}
bool FftsA5Task::GetTask(uint16_t *aicEventPtr, uint16_t *aivEventPtr, prof_start_para_t &fftsA5ProfStartPara) const
{
uint32_t fftsA5ConfigSize = sizeof(FftsA5ProfileConfigT);
auto *configPtrFfts = static_cast<FftsA5ProfileConfigT *>(malloc(fftsA5ConfigSize));
if (configPtrFfts == nullptr) {
ERROR_LOG("Can not get user data pointer while getting ffts task");
return false;
}
fftsA5ProfStartPara.user_data = nullptr;
std::fill_n(reinterpret_cast<char *>(configPtrFfts), fftsA5ConfigSize, 0);
configPtrFfts->cfgMode = FFTS_PROF_CFG_MODE;
configPtrFfts->blockShinkFlag = FFTS_PROF_BLOCK_SHINK_DISABLE;
configPtrFfts->aicScale = useProfileMode ? FFTS_PROF_AIC_SCALE_PARTIAL : FFTS_PROF_AIC_SCALE_ALL;
configPtrFfts->aiEventCfg[0].period = PROF_CFG_PERIOD;
configPtrFfts->aiEventCfg[0].eventNum = PMU_EVENT_MAX_NUM_A5;
configPtrFfts->aiEventCfg[0].type = FFTS_PROF_CFG_TYPE;
configPtrFfts->aiEventCfg[0].coreMask = FFTS_PROF_CFG_CORE_MASK;
configPtrFfts->aiEventCfg[1].period = PROF_CFG_PERIOD;
configPtrFfts->aiEventCfg[1].eventNum = PMU_EVENT_MAX_NUM_A5;
configPtrFfts->aiEventCfg[1].type = FFTS_PROF_CFG_TYPE;
configPtrFfts->aiEventCfg[1].coreMask = FFTS_PROF_CFG_CORE_MASK;
for (size_t i = 0 ; i < PMU_EVENT_MAX_NUM_A5 ; ++i, ++aicEventPtr, ++aivEventPtr) {
DEBUG_LOG("PMU event %zu, aic: %d, aiv: %d", i, *aicEventPtr, *aivEventPtr);
configPtrFfts->aiEventCfg[0].event[i] = *aicEventPtr;
configPtrFfts->aiEventCfg[1].event[i] = *aivEventPtr;
}
fftsA5ProfStartPara.channel_type = PROF_TS_TYPE;
fftsA5ProfStartPara.sample_period = 0;
fftsA5ProfStartPara.real_time = PROF_REAL;
fftsA5ProfStartPara.user_data = configPtrFfts;
fftsA5ProfStartPara.user_data_size = fftsA5ConfigSize;
return true;
}
bool HccsTask::GetTask(prof_start_para_t &l2CacheProfStartPara) const
{
l2CacheProfStartPara.channel_type = PROF_PERIPHERAL_TYPE;
l2CacheProfStartPara.sample_period = static_cast<unsigned int>(10);
l2CacheProfStartPara.real_time = PROF_REAL;
l2CacheProfStartPara.user_data = nullptr;
l2CacheProfStartPara.user_data_size = 0;
return true;
}
bool FftsTask::GetTask(uint16_t *aicEventPtr, uint16_t *aivEventPtr, prof_start_para_t &fftsProfStartPara) const
{
uint32_t fftsConfigSize = sizeof(FftsProfileConfigT);
auto *configPtrFfts = static_cast<FftsProfileConfigT *>(malloc(fftsConfigSize));
if (configPtrFfts == nullptr) {
ERROR_LOG("Can not get user data pointer while getting ffts task");
return false;
}
fftsProfStartPara.user_data = nullptr;
std::fill_n(reinterpret_cast<char *>(configPtrFfts), fftsConfigSize, 0);
configPtrFfts->cfgMode = FFTS_PROF_CFG_MODE;
for (int i = 0; i < 2; i++) {
configPtrFfts->aiEventCfg[i].period = PROF_CFG_PERIOD;
configPtrFfts->aiEventCfg[i].event_num = PMU_EVENT_MAX_NUM;
configPtrFfts->aiEventCfg[i].type = FFTS_PROF_CFG_TYPE;
configPtrFfts->aiEventCfg[i].core_mask = FFTS_PROF_CFG_CORE_MASK;
}
if (useProfileMode) {
configPtrFfts->tag = 0x10000;
DEBUG_LOG("Successful set kernelScale!");
}
for (size_t i = 0 ; i < PMU_EVENT_MAX_NUM ; ++i, ++aicEventPtr, ++aivEventPtr) {
DEBUG_LOG("PMU event %zu, aic: %d, aiv: %d", i, *aicEventPtr, *aivEventPtr);
configPtrFfts->aiEventCfg[0].event[i] = *aicEventPtr;
configPtrFfts->aiEventCfg[1].event[i] = *aivEventPtr;
}
fftsProfStartPara.channel_type = PROF_TS_TYPE;
fftsProfStartPara.sample_period = PROF_CFG_PERIOD;
fftsProfStartPara.real_time = PROF_REAL;
fftsProfStartPara.user_data = configPtrFfts;
fftsProfStartPara.user_data_size = fftsConfigSize;
return true;
}
bool StarsTask::GetTask(prof_start_para_t &starsProfStartPara) const
{
uint32_t starsConfigSize = sizeof(StarsSocLogConfigT);
auto *starsConfigPtr = static_cast<StarsSocLogConfigT *>(malloc(starsConfigSize));
starsProfStartPara.user_data = nullptr;
if (starsConfigPtr == nullptr) {
ERROR_LOG("Can not get user data pointer while getting stars task");
return false;
}
std::fill_n(reinterpret_cast<char *>(starsConfigPtr), starsConfigSize, 0);
starsConfigPtr->acsq_task = STARS_ENABLE_FLAG;
starsConfigPtr->ffts_thread_task = STARS_ENABLE_FLAG;
starsConfigPtr->acc_pmu = STARS_ENABLE_FLAG;
starsProfStartPara.channel_type = PROF_TS_TYPE;
starsProfStartPara.sample_period = PROF_CFG_PERIOD;
starsProfStartPara.real_time = PROF_REAL;
starsProfStartPara.user_data = starsConfigPtr;
starsProfStartPara.user_data_size = starsConfigSize;
return true;
}
bool StarsTask::GetTaskA5(prof_start_para_t &starsProfStartPara) const
{
uint32_t starsConfigSize = sizeof(StarsA5SocLogConfigT);
auto *starsConfigPtr = static_cast<StarsA5SocLogConfigT *>(malloc(starsConfigSize));
starsProfStartPara.user_data = nullptr;
if (starsConfigPtr == nullptr) {
ERROR_LOG("Can not get user data pointer while getting stars task");
return false;
}
std::fill_n(reinterpret_cast<char *>(starsConfigPtr), starsConfigSize, 0);
starsConfigPtr->acsq_task = STARS_ENABLE_FLAG;
starsConfigPtr->ffts_thread_task = STARS_ENABLE_FLAG;
starsConfigPtr->acc_pmu = STARS_ENABLE_FLAG;
starsProfStartPara.channel_type = PROF_TS_TYPE;
starsProfStartPara.sample_period = PROF_CFG_PERIOD;
starsProfStartPara.real_time = PROF_REAL;
starsProfStartPara.user_data = starsConfigPtr;
starsProfStartPara.user_data_size = starsConfigSize;
return true;
}
bool AiCoreTask::GetTask(uint16_t *eventPtr, prof_start_para_t &aiCoreProfStartPara) const
{
uint32_t aiCoreConfigSize = sizeof(TsAiCoreProfileConfigT);
auto *configPtr = static_cast<TsAiCoreProfileConfigT *>(malloc(aiCoreConfigSize));
aiCoreProfStartPara.user_data = nullptr;
if (configPtr == nullptr) {
ERROR_LOG("Can not get user data pointer while getting aicore task");
return false;
}
std::fill_n(reinterpret_cast<char *>(configPtr), aiCoreConfigSize, 0);
configPtr->tag = AICORE_TAG;
configPtr->type = AICORE_TASK_BASE_TYPE;
configPtr->eventNum = PMU_EVENT_MAX_NUM;
for (size_t i = 0 ; i < PMU_EVENT_MAX_NUM ; ++i, ++eventPtr) {
DEBUG_LOG("aicore event %zu, pmu: %d", i, *eventPtr);
configPtr->event[i] = *eventPtr;
}
aiCoreProfStartPara.channel_type = PROF_TS_TYPE;
aiCoreProfStartPara.sample_period = PROF_CFG_PERIOD;
aiCoreProfStartPara.real_time = PROF_REAL;
aiCoreProfStartPara.user_data = configPtr;
aiCoreProfStartPara.user_data_size = aiCoreConfigSize;
return true;
}
bool HwtsTask::GetTask(prof_start_para_t &hwtsProfStartPara) const
{
uint32_t hwtsConfigSize = sizeof(TsHwtsProfileConfigT);
auto *configPtr = static_cast<TsHwtsProfileConfigT *>(malloc(hwtsConfigSize));
hwtsProfStartPara.user_data = nullptr;
if (configPtr == nullptr) {
ERROR_LOG("Can not get user data pointer while getting hwts task");
return false;
}
std::fill_n(reinterpret_cast<char *>(configPtr), hwtsConfigSize, 0);
configPtr->tag = HWTS_TAG;
hwtsProfStartPara.channel_type = PROF_TS_TYPE;
hwtsProfStartPara.sample_period = PROF_CFG_PERIOD;
hwtsProfStartPara.real_time = PROF_REAL;
hwtsProfStartPara.user_data = configPtr;
hwtsProfStartPara.user_data_size = hwtsConfigSize;
return true;
}
bool L2CacheTask::GetTask(uint16_t *eventPtr, prof_start_para_t &l2CacheProfStartPara) const
{
uint32_t l2CacheConfigSize = sizeof(TsL2CacheProfileConfigT);
auto *configPtr = static_cast<TsL2CacheProfileConfigT *>(malloc(l2CacheConfigSize));
l2CacheProfStartPara.user_data = nullptr;
if (configPtr == nullptr) {
ERROR_LOG("Can not get user data pointer while getting l2cache task");
return false;
}
std::fill_n(reinterpret_cast<char *>(configPtr), l2CacheConfigSize, 0);
configPtr->eventNum = PMU_EVENT_MAX_NUM;
for (size_t i = 0 ; i < PMU_EVENT_MAX_NUM ; ++i, ++eventPtr) {
DEBUG_LOG("l2cache event %zu, pmu: %d", i, *eventPtr);
configPtr->event[i] = *eventPtr;
}
l2CacheProfStartPara.channel_type = PROF_TS_TYPE;
l2CacheProfStartPara.sample_period = PROF_CFG_PERIOD;
l2CacheProfStartPara.real_time = PROF_REAL;
l2CacheProfStartPara.user_data = configPtr;
l2CacheProfStartPara.user_data_size = l2CacheConfigSize;
return true;
}
bool L2CacheTask::GetL2cacheEvict(prof_start_para_t &l2CacheProfStartPara, uint16_t *l2EventPtr) const
{
uint32_t l2CacheConfigSize = sizeof(TsL2CacheProfileConfigT) + 8 * sizeof(uint32_t);
auto *configPtr = static_cast<TsL2CacheProfileConfigT *>(malloc(l2CacheConfigSize));
l2CacheProfStartPara.user_data = nullptr;
if (configPtr == nullptr) {
ERROR_LOG("Can not get user data pointer while getting l2cache task");
return false;
}
std::fill_n(reinterpret_cast<char *>(configPtr), l2CacheConfigSize, 0);
std::copy(l2EventPtr, l2EventPtr + PMU_EVENT_MAX_NUM, configPtr->event);
uint32_t *eventPtr = configPtr->event;
configPtr->eventNum = PMU_EVENT_MAX_NUM;
for (size_t i = 0 ; i < PMU_EVENT_MAX_NUM ; ++i, ++eventPtr) {
DEBUG_LOG("l2cache event %zu, pmu:0x%X", i, *eventPtr);
configPtr->event[i] = *eventPtr;
}
l2CacheProfStartPara.channel_type = PROF_TS_TYPE;
l2CacheProfStartPara.sample_period = PROF_CFG_PERIOD;
l2CacheProfStartPara.real_time = PROF_REAL;
l2CacheProfStartPara.user_data = configPtr;
l2CacheProfStartPara.user_data_size = l2CacheConfigSize;
return true;
}
void ProfTask::ChannelRead()
{
map<unsigned int, std::string> channelFileMap = {
{CHANNEL_FFTS_PROFILE_BUFFER_TASK, "DeviceProf"},
{CHANNEL_AICORE, "DeviceProf"},
{CHANNEL_STARS_SOC_LOG_BUFFER, "duration.bin"},
{CHANNEL_HWTS_LOG, "duration.bin"},
{CHANNEL_TSFW_L2, "L2Cache.bin"},
{CHANNEL_AICPU, "aicpu.bin"},
{CHANNEL_HCCS, "hccs.bin"}
};
vector<char> outBuf(PROF_CHANNEL_BUFFER_SIZE, 0);
vector<prof_poll_info_t> channels(PROF_CHANNEL_NUM);
InstrChnReadCtrl instrChnReadController;
do {
int ret = prof_channel_poll_origin(channels.data(), PROF_CHANNEL_NUM, 1);
for (int i = 0; i < std::min(ret, PROF_CHANNEL_NUM); i++) {
std::fill_n(&outBuf[0], PROF_CHANNEL_BUFFER_SIZE, 0);
int curLen = prof_channel_read_origin(channels[i].device_id, channels[i].channel_id,
&outBuf[0], PROF_CHANNEL_BUFFER_SIZE);
string dumpPath = ProfDataCollect::GetAicoreOutputPath(static_cast<int32_t>(channels[i].device_id));
if (dumpPath.empty()) {
ERROR_LOG("Can not find dump path to generate bin file.");
continue;
}
if (WriteInstrChannelData(dumpPath, static_cast<InstrChannel>(channels[i].channel_id),
&outBuf[0], curLen, instrChnReadController)) {
continue;
}
auto channelIter = channelFileMap.find(channels[i].channel_id);
if ((channelIter == channelFileMap.end()) || (curLen <= 0)) {
continue;
}
string fileName = channelIter->second;
if (channels[i].channel_id == CHANNEL_FFTS_PROFILE_BUFFER_TASK ||
channels[i].channel_id == CHANNEL_AICORE) {
fileName = fileName + to_string(ProfDataCollect::GetDeviceReplayCount(
static_cast<int32_t>(channels[i].device_id)) + 1) + ".bin";
}
string binFile = JoinPath({dumpPath, fileName});
if (WriteBinary(binFile, &outBuf[0], curLen, std::ios::app) == 0) {
WARN_LOG("Write bin failed, bin is %s", binFile.c_str());
} else {
DEBUG_LOG("Write bin success, bin is %s, curLen is %d", binFile.c_str(), curLen);
}
}
} while (profRunning_);
}
bool ProfTaskOfA5::WriteInstrChannelData(const std::string &prefixName, InstrChannel channelId,
const char *data, int validLen, InstrChnReadCtrl &instrChnReadController)
{
if (instrProfChn_.count(channelId) == 0UL) {
return false;
}
DEBUG_LOG("Write instr prof channel id:%d", static_cast<int>(channelId));
if (instrChnReadController.InstrProfReadSize > MAX_BIN_FILE_SIZE - sizeof(InstrProfHeadInfoT) -
static_cast<uint64_t>(PROF_CHANNEL_BUFFER_SIZE)) {
instrChnReadController.splitFileNum += 1;
instrChnReadController.InstrProfReadSize = 0;
}
std::string binName;
if (instrProfState_ == InstrProfState::PC_SAMPLING) {
binName = "pcSampling.bin." + std::to_string(instrChnReadController.splitFileNum);
} else if (instrProfState_ == InstrProfState::TIMELINE) {
binName = "timeline.bin." + std::to_string(instrChnReadController.splitFileNum);
} else {
return true;
}
std::string binFile = JoinPath({prefixName, binName});
InstrProfHeadInfoT instrProfHeadInfo;
instrProfHeadInfo.coreId = static_cast<uint16_t>(instrProfChn_[channelId].grpId);
instrProfHeadInfo.coreType = static_cast<uint16_t>(instrProfChn_[channelId].grpType);
instrProfHeadInfo.validLen = static_cast<uint32_t>(validLen);
if (WriteBinary(binFile, reinterpret_cast<char *>(&instrProfHeadInfo), sizeof(InstrProfHeadInfoT),
std::ios::app) == 0UL) {
DEBUG_LOG("Write instr head info failed, bin is %s", binName.c_str());
return true;
} else {
instrChnReadController.InstrProfReadSize += sizeof(InstrProfHeadInfoT);
DEBUG_LOG("Write instr head info success, bin is %s, valid len is %u", binName.c_str(),
instrProfHeadInfo.validLen);
}
if (WriteBinary(binFile, data, PROF_CHANNEL_BUFFER_SIZE, std::ios::app) == 0UL) {
DEBUG_LOG("Write instr data info failed, bin is %s", binName.c_str());
} else {
instrChnReadController.InstrProfReadSize += static_cast<uint64_t>(PROF_CHANNEL_BUFFER_SIZE);
DEBUG_LOG("Write instr data info success, bin is %s, size is %u", binName.c_str(),
instrChnReadController.InstrProfReadSize);
}
return true;
}
bool ProfTaskOfA5::StartInstrProfTask(int mode)
{
InstrProfTask instrProfTask;
prof_start_para_t instrProfStartPara;
if (!instrProfTask.GetTask(mode, instrProfStartPara)) {
return false;
}
for (auto const &it : instrProfChn_) {
int ret = prof_drv_start_origin(deviceId_, static_cast<uint32_t>(it.first), &instrProfStartPara);
if (ret != 0) {
DEBUG_LOG("Failed to start instr profiling channel:%d, return code:%d", static_cast<uint32_t>(it.first), ret);
}
}
free(instrProfStartPara.user_data);
return true;
}
bool ProfTaskOfA5::StartFFTSTask(uint32_t replayCount)
{
FftsA5Task fftsTask;
if (replayCount >= EVENT_MAX_NUM_A5 / PMU_EVENT_MAX_NUM_A5) {
return false;
}
uint16_t *aicEventPtr = profTaskConfig_.aicPmu + replayCount * PMU_EVENT_MAX_NUM_A5;
uint16_t *aivEventPtr = profTaskConfig_.aivPmu + replayCount * PMU_EVENT_MAX_NUM_A5;
prof_start_para_t fftsProfStartPara;
if (profTaskConfig_.useProfileMode) {
fftsTask.useProfileMode = true;
DEBUG_LOG("Successful set kernelScale!");
}
if (!fftsTask.GetTask(aicEventPtr, aivEventPtr, fftsProfStartPara) ||
prof_drv_start_origin(deviceId_, CHANNEL_FFTS_PROFILE_BUFFER_TASK, &fftsProfStartPara) != 0) {
ERROR_LOG("Profiling channel start failed.");
free(fftsProfStartPara.user_data);
return false;
}
free(fftsProfStartPara.user_data);
if ((replayCount == EVENT_MAX_NUM_A5 / PMU_EVENT_MAX_NUM_A5 - 1) ||
((aicEventPtr[PMU_EVENT_MAX_NUM_A5] == 0) && (aivEventPtr[PMU_EVENT_MAX_NUM_A5] == 0))) {
isLastReplay_ = true;
}
return true;
}
bool ProfTaskOfA5::StartStarsTask()
{
StarsTask starsTask {};
prof_start_para_t starsProfStartPara;
if (!starsTask.GetTaskA5(starsProfStartPara) ||
prof_drv_start_origin(deviceId_, CHANNEL_STARS_SOC_LOG_BUFFER, &starsProfStartPara) != 0) {
ERROR_LOG("Profiling channel start failed.");
free(starsProfStartPara.user_data);
return false;
}
free(starsProfStartPara.user_data);
isStarsStart_ = true;
return true;
}
bool ProfTaskOfA5::Start(uint32_t replayCount, bool hasSimt)
{
profRunning_ = true;
hasSimt_ = hasSimt;
if (instrProfState_ == InstrProfState::IDLE &&
ProfConfig::Instance().IsPCSamplingEnabled() && hasSimt_) {
instrProfState_ = InstrProfState::PC_SAMPLING;
return StartInstrProfTask(INSTR_PROF_MODE_PC_SAMPLING);
}
if ((instrProfState_ == InstrProfState::IDLE || instrProfState_ == InstrProfState::TIMELINE) &&
(ProfConfig::Instance().IsInstrTimelineEnabled() || ProfConfig::Instance().IsPipeTimelineEnabled())) {
instrProfState_ = InstrProfState::TIMELINE;
return StartInstrProfTask(INSTR_PROF_MODE_BIU_PERF);
}
if (!StartFFTSTask(replayCount)) {
return false;
}
if (!isStarsStart_ && isLastReplay_) {
return StartStarsTask();
}
return true;
}
void ProfTaskOfA5::StopInstrProfChannels()
{
for (auto const &it : instrProfChn_) {
prof_stop_origin(deviceId_, static_cast<uint32_t>(it.first));
}
}
void ProfTaskOfA5::Stop()
{
prof_stop_origin(deviceId_, CHANNEL_FFTS_PROFILE_BUFFER_TASK);
if (isStarsStart_) {
prof_stop_origin(deviceId_, CHANNEL_STARS_SOC_LOG_BUFFER);
}
if (instrProfState_ == InstrProfState::PC_SAMPLING) {
StopInstrProfChannels();
instrProfState_ = (ProfConfig::Instance().IsInstrTimelineEnabled() || ProfConfig::Instance().IsPipeTimelineEnabled())
? InstrProfState::TIMELINE : InstrProfState::DONE;
} else if (instrProfState_ == InstrProfState::TIMELINE) {
StopInstrProfChannels();
instrProfState_ = InstrProfState::DONE;
}
profRunning_ = false;
}
bool ProfTaskOf910B::StartStarsTask()
{
StarsTask starsTask;
prof_start_para_t starsProfStartPara {};
if (!starsTask.GetTask(starsProfStartPara) ||
prof_drv_start_origin(deviceId_, CHANNEL_STARS_SOC_LOG_BUFFER, &starsProfStartPara) != 0) {
ERROR_LOG("Profiling channel start failed.");
free(starsProfStartPara.user_data);
return false;
}
free(starsProfStartPara.user_data);
isStarsStart_ = true;
return true;
}
bool ProfTaskOf910B::StartFFTSTask(uint32_t replayCount)
{
FftsTask fftsTask {};
if (replayCount > (EVENT_MAX_NUM / PMU_EVENT_MAX_NUM - 1)) {
return false;
}
uint16_t *aicEventPtr = profTaskConfig_.aicPmu + replayCount * PMU_EVENT_MAX_NUM;
uint16_t *aivEventPtr = profTaskConfig_.aivPmu + replayCount * PMU_EVENT_MAX_NUM;
prof_start_para_t fftsProfStartPara {};
if (profTaskConfig_.useProfileMode) {
fftsTask.useProfileMode = true;
}
if (!fftsTask.GetTask(aicEventPtr, aivEventPtr, fftsProfStartPara) ||
prof_drv_start_origin(deviceId_, CHANNEL_FFTS_PROFILE_BUFFER_TASK, &fftsProfStartPara) != 0) {
ERROR_LOG("Profiling channel start failed.");
free(fftsProfStartPara.user_data);
return false;
}
free(fftsProfStartPara.user_data);
if ((replayCount == EVENT_MAX_NUM / PMU_EVENT_MAX_NUM - 1) ||
((aicEventPtr[PMU_EVENT_MAX_NUM] == 0) && (aivEventPtr[PMU_EVENT_MAX_NUM] == 0))) {
isLastReplay_ = true;
}
return true;
}
bool ProfTaskOf910B::StartTSFWTask()
{
if (isL2CacheStart_) {
return true;
}
L2CacheTask l2CacheTask;
prof_start_para_t l2CacheProfStartPara;
uint16_t *l2EventPtr = profTaskConfig_.l2CachePmu;
l2CacheTask.GetL2cacheEvict(l2CacheProfStartPara, l2EventPtr);
if (prof_drv_start_origin(deviceId_, CHANNEL_TSFW_L2, &l2CacheProfStartPara) != 0) {
ERROR_LOG("Profiling channel start failed, channel is %u.", static_cast<uint32_t>(CHANNEL_TSFW_L2));
free(l2CacheProfStartPara.user_data);
return false;
}
free(l2CacheProfStartPara.user_data);
isL2CacheStart_ = true;
return true;
}
bool ProfTaskOf910B::StartHCCSTask()
{
if (isHccs_) {
return true;
}
HccsTask hccsTask;
prof_start_para_t profStartPara;
hccsTask.GetTask(profStartPara);
if (prof_drv_start_origin(deviceId_, CHANNEL_HCCS, &profStartPara) != 0) {
ERROR_LOG("Profiling channel start failed, channel is %d.", static_cast<uint32_t>(CHANNEL_HCCS));
free(profStartPara.user_data);
return false;
}
free(profStartPara.user_data);
isHccs_ = true;
return true;
}
bool ProfTaskOf910B::Start(uint32_t replayCount, bool hasSimt)
{
profRunning_ = true;
if (profTaskConfig_.l2CachePmu[0] != 0) {
profTaskConfig_.l2CachePmu[0] = 0;
if (!StartTSFWTask()) {
WARN_LOG("Profiling l2 channel start failed!.");
return false;
}
return true;
}
if (!StartFFTSTask(replayCount)) {
return false;
}
if (replayCount == 0 && KernelContext::Instance().GetMC2Flag()) {
AicpuTask aicpuTask;
prof_start_para_t aicpuProfStartPara;
aicpuTask.GetTask(aicpuProfStartPara);
if (prof_drv_start_origin(deviceId_, CHANNEL_AICPU, &aicpuProfStartPara) != 0) {
ERROR_LOG("Profiling channel start failed.");
return false;
}
}
if (!isStarsStart_ && isLastReplay_) {
return StartStarsTask();
}
return true;
}
void ProfTaskOf910B::Stop()
{
prof_stop_origin(deviceId_, CHANNEL_FFTS_PROFILE_BUFFER_TASK);
if (isStarsStart_) {
prof_stop_origin(deviceId_, CHANNEL_STARS_SOC_LOG_BUFFER);
}
if (isLastReplay_ && KernelContext::Instance().GetMC2Flag()) {
prof_stop_origin(deviceId_, CHANNEL_AICPU);
}
if (isL2CacheStart_) {
prof_stop_origin(deviceId_, CHANNEL_TSFW_L2);
}
if (isHccs_) {
prof_stop_origin(deviceId_, CHANNEL_HCCS);
}
profRunning_ = false;
}
bool ProfTaskOf310P::StartHwtsTask()
{
HwtsTask hwtsTask;
prof_start_para_t hwtsProfStartPara {};
if (!hwtsTask.GetTask(hwtsProfStartPara) ||
prof_drv_start_origin(deviceId_, CHANNEL_HWTS_LOG, &hwtsProfStartPara) != 0) {
ERROR_LOG("Profiling channel start failed.");
free(hwtsProfStartPara.user_data);
return false;
}
free(hwtsProfStartPara.user_data);
isHwtsStart_ = true;
return true;
}
bool ProfTaskOf310P::Start(uint32_t replayCount, bool hasSimt)
{
profRunning_ = true;
if (replayCount >= EVENT_MAX_NUM / PMU_EVENT_MAX_NUM) {
return false;
}
uint16_t *aicEventPtr = profTaskConfig_.aicPmu + replayCount * PMU_EVENT_MAX_NUM;
if (profTaskConfig_.useProfileMode) {
DEBUG_LOG("KernelScale param is only use for Ascend910B!");
}
if (aicEventPtr[0] != 0 || replayCount == 0) {
AiCoreTask aiCoreTask {};
prof_start_para_t aiCoreProfStartPara;
if (!aiCoreTask.GetTask(aicEventPtr, aiCoreProfStartPara) ||
prof_drv_start_origin(deviceId_, CHANNEL_AICORE, &aiCoreProfStartPara) != 0) {
ERROR_LOG("Profiling channel start failed.");
free(aiCoreProfStartPara.user_data);
return false;
}
free(aiCoreProfStartPara.user_data);
isAiCoreStart_ = true;
}
if (!isHwtsStart_ && (aicEventPtr[PMU_EVENT_MAX_NUM] == 0)) {
if (!StartHwtsTask()) {
return false;
}
}
if (replayCount == 0 && profTaskConfig_.l2CachePmu[0] != 0) {
L2CacheTask l2CacheTask {};
prof_start_para_t l2CacheProfStartPara {};
if (!l2CacheTask.GetTask(profTaskConfig_.l2CachePmu, l2CacheProfStartPara) ||
prof_drv_start_origin(deviceId_, CHANNEL_TSFW_L2, &l2CacheProfStartPara) != 0) {
ERROR_LOG("Profiling channel start failed.");
free(l2CacheProfStartPara.user_data);
return false;
}
free(l2CacheProfStartPara.user_data);
isL2CacheStart_ = true;
}
return true;
}
void ProfTaskOf310P::Stop()
{
if (isAiCoreStart_) {
prof_stop_origin(deviceId_, CHANNEL_AICORE);
}
if (isHwtsStart_) {
prof_stop_origin(deviceId_, CHANNEL_HWTS_LOG);
}
if (isL2CacheStart_) {
prof_stop_origin(deviceId_, CHANNEL_TSFW_L2);
}
profRunning_ = false;
}
std::unique_ptr<ProfTask> ProfTaskFactory::Create()
{
uint32_t deviceId = static_cast<uint32_t>(DeviceContext::GetRunningDeviceId());
std::string socVersion = DeviceContext::Local().GetSocVersion();
std::unique_ptr<ProfTask> taskPtr;
if (StartsWith(socVersion, "Ascend310P")) {
taskPtr = std::unique_ptr<ProfTaskOf310P>{
new ProfTaskOf310P(ProfConfig::Instance().GetConfig(), deviceId)};
} else if (StartsWith(socVersion, "Ascend910B")) {
taskPtr = std::unique_ptr<ProfTaskOf910B>{
new ProfTaskOf910B(ProfConfig::Instance().GetConfig(), deviceId)};
} else if (StartsWith(socVersion, "Ascend950")) {
taskPtr = std::unique_ptr<ProfTaskOfA5>{
new ProfTaskOfA5(ProfConfig::Instance().GetConfig(), deviceId)};
} else {
taskPtr = std::unique_ptr<ProfTask>{
new ProfTaskOf910B(ProfConfig::Instance().GetConfig(), deviceId)};
}
return taskPtr;
}