* This file is part of the MindStudio project.
* Copyright (c) 2025 Huawei Technologies Co.,Ltd.
*
* MindStudio is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
*
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
* ------------------------------------------------------------------------- */
#include "ProfDataCollect.h"
#include <map>
#include <chrono>
#include <atomic>
#include <csignal>
#include <mutex>
#include <sys/types.h>
#include <sys/wait.h>
#include <elf.h>
#include "ProfConfig.h"
#include "KernelContext.h"
#include "DeviceContext.h"
#include "ProfTask.h"
#include "utils/Ustring.h"
#include "utils/FileSystem.h"
#include "utils/Environment.h"
#include "utils/PipeCall.h"
#include "utils/signal.h"
#include "MemoryContext.h"
#include "ascend_hal/AscendHalOrigin.h"
#include "MsTx.h"
#include "BBCountDumper.h"
#include "KernelReplacement.h"
#include "ascendcl/AscendclOrigin.h"
#include "hccl/HcclOrigin.h"
#include "DBITask.h"
#include "camodel/CamodelHelper.h"
#include "runtime/inject_helpers/RegisterContext.h"
#include "acl_rt_impl/AscendclImplOrigin.h"
#include "profapi/ProfOriginal.h"
#include "LaunchManager.h"
#include "runtime/inject_helpers/DFXKernelLauncher.h"
#include "profapi/ProfInjectHelper.h"
using namespace std;
namespace {
constexpr uint32_t PROF_AICORE_METRICS = 0x00000004ULL;
constexpr uint32_t PROF_INSTR = 0x00800000ULL;
constexpr uint32_t PROF_L2CACHE = 0x00000010ULL;
constexpr uint32_t MSPROF_DEVICE_NUM = 1;
constexpr uint32_t PROF_INVALID_MODE_ID = 0xFFFFFFFFUL;
constexpr int32_t SYNCHRONIZE_TIME_OUT = 10000;
constexpr int32_t WAIT_DATA_READ_TIME = 100;
constexpr char const *CAMODEL_LOG_PATH_ENV = "CAMODEL_LOG_PATH";
constexpr char const *MSOPPROF_INJECTION_LIB_PATH_FROM_MSOPPROF = "lib64/libmsopprof_injection.so";
constexpr char const *MSOPPROF_OUTPUT_DUMP_PATH_ENV = "MSOPPROF_OUTPUT_DUMP_PATH";
constexpr char const *AICORE_KERNEL_NAME = "aicore_binary.o";
std::atomic<bool> g_receiveSignal{false};
std::once_flag g_sigRegFlag;
struct L2CacheClearTiling {
uint64_t clearSizePerCore;
uint32_t aicCoreNum;
};
static std::map<std::string, L2CacheClearTiling> l2CacheClearTilingMap = {
{"Ascend910B1", {8388608, 24}},
{"Ascend910B2", {8388608, 24}},
{"Ascend910B2C", {8388608, 24}},
{"Ascend910B3", {10485760, 20}},
{"Ascend910B4", {5242880, 20}},
{"Ascend910B4-1", {10485760, 20}},
{"Ascend910_9391", {8388608, 24}},
{"Ascend910_9392", {8388608, 24}},
{"Ascend910_9381", {8388608, 24}},
{"Ascend910_9382", {8388608, 24}},
{"Ascend910_9372", {10485760, 20}},
{"Ascend910_9362", {10485760, 20}},
{"Ascend310P1", {2097152, 10}},
{"Ascend310P3", {2097152, 8}},
{"Ascend310P5", {2097152, 8}},
{"Ascend310P7", {2097152, 8}},
{"Ascend950DT_950x", {4194304, 8}},
{"Ascend950DT_950y", {4194304, 8}},
{"Ascend950DT_9571", {4794880, 28}},
{"Ascend950DT_9572", {4794880, 28}},
{"Ascend950DT_9573", {4194304, 28}},
{"Ascend950DT_9574", {4194304, 28}},
{"Ascend950DT_9575", {4794880, 28}},
{"Ascend950DT_9576", {4794880, 28}},
{"Ascend950DT_9577", {4194304, 28}},
{"Ascend950DT_9578", {4194304, 28}},
{"Ascend950DT_9581", {4194304, 32}},
{"Ascend950DT_9582", {4194304, 32}},
{"Ascend950DT_9583", {3670016, 32}},
{"Ascend950DT_9584", {3670016, 32}},
{"Ascend950DT_9585", {4194304, 32}},
{"Ascend950DT_9586", {4194304, 32}},
{"Ascend950DT_9587", {3670016, 32}},
{"Ascend950DT_9588", {3670016, 32}},
{"Ascend950DT_9591", {3729920, 36}},
{"Ascend950DT_9592", {3729920, 36}},
{"Ascend950DT_9595", {3729920, 36}},
{"Ascend950DT_9596", {3729920, 36}},
{"Ascend950DT_95A1", {3728280, 36}},
{"Ascend950DT_95A2", {3728280, 36}},
{"Ascend950PR_950z", {2097152, 8}},
{"Ascend950PR_9579", {4794880, 28}},
{"Ascend950PR_957b", {4194304, 28}},
{"Ascend950PR_957c", {4194304, 28}},
{"Ascend950PR_957d", {3596800, 28}},
{"Ascend950PR_9589", {4194304, 32}},
{"Ascend950PR_958b", {3670016, 32}},
{"Ascend950PR_9599", {3729920, 36}},
};
aclError CheckAclResult(aclError result, const string &apiName) {
if (result == ACL_SUCCESS) {
return result;
}
WARN_LOG("aclrt API call %s() failed. error code: %d", apiName.c_str(), result);
return result;
}
struct L2cacheParam {
void *flushBuffer;
void *buffer;
void *cmoBuffer;
void *stream;
void *blockLen;
void *l2Buffer;
uint32_t blockDim;
uint64_t bufferSize;
};
}
class SimulatorLauncher {
public:
SimulatorLauncher();
void Launch(const std::string &dumpPath, uint64_t launchId = UINT64_MAX, bool aclNew = false);
private:
bool RuntimeToTargetLib(
std::map<std::string, std::string> &env, const std::string &runtimePath, const std::string &targetPath) const;
std::vector<std::string> SetEnvToSimu(const std::string &dumpPath);
std::vector<std::string> GetLaunchArgs(const std::string &outputDir);
std::string kernelLaunchBinPath_;
std::string opprofInjectionLib_;
};
static void HandleSigInt(int32_t signo) {
if (signo == SIGINT) {
g_receiveSignal = true;
SignalWrapper::UnregisterCallback(SIGINT);
}
}
class DataCollect {
public:
int32_t deviceId_ = 0;
std::string outputPath_;
std::string kernelName_;
LaunchContextSP launchCtx_{nullptr};
bool hasSimt_ = false;
static std::mutex outputMutex_;
static std::map<int32_t, std::string> deviceOutputPathMap_;
static std::mutex replayCountMutex_;
static std::map<int32_t, uint32_t> deviceReplayCountMap_;
static std::mutex rangeConfigMutex_;
static std::map<std::thread::id, RangeReplayConfig> threadRangeConfigMap_;
explicit DataCollect(const LaunchContextSP &ctx, bool isInitOutput = true);
virtual bool ProfData(rtStream_t stream, const std::function<bool(void)> &kernelLaunchFunc) { return false; }
virtual bool InstrProfData(rtStream_t stream, const std::function<bool(void)> &kernelLaunchFunc) { return false; }
virtual bool ProfData() { return false; }
virtual void ProfInit(const void *hdl, const void *stubFunc, bool type) {};
virtual void GenBBcountFile(uint64_t regId, uint64_t memSize, uint8_t *memInfo) {};
virtual bool RangeReplay(const rtStream_t &stream, const aclmdlRI &modelRI) { return false; };
bool GetPcStartAddr(const void *hdlOrStubFunc, uint64_t tiling, bool type, uint64_t &pcStart) const {
if (type) {
KernelContext::StubFuncPtr stubFuncPtr{hdlOrStubFunc};
if (!KernelContext::Instance().CheckStubValid(stubFuncPtr.value) ||
!KernelContext::Instance().GetDeviceContext().GetPcStartAddr(
KernelContext::StubFuncArgs{stubFuncPtr.value, nullptr}, pcStart)) {
DEBUG_LOG("Failed to get start pc, StubFuncPtr value is %p", stubFuncPtr.value);
return false;
}
} else {
KernelContext::KernelHandlePtr hdlPtr{hdlOrStubFunc};
if (!KernelContext::Instance().CheckHdlVaild(hdlPtr.value) ||
!KernelContext::Instance().GetDeviceContext().GetPcStartAddr(
KernelContext::KernelHandleArgs{hdlPtr.value, nullptr, tiling}, pcStart)) {
DEBUG_LOG("Failed to get start pc, KernelHandlePtr value is %p", hdlPtr.value);
return false;
}
}
return true;
}
virtual void GenDBIData(uint64_t memSize, uint8_t *memInfo) {};
virtual void GenRecordData(uint64_t memSize, uint8_t *memInfo, const std::string &recordName) const {};
};
std::map<int32_t, std::string> DataCollect::deviceOutputPathMap_;
std::map<int32_t, uint32_t> DataCollect::deviceReplayCountMap_;
std::map<std::thread::id, RangeReplayConfig> DataCollect::threadRangeConfigMap_;
std::mutex DataCollect::outputMutex_{};
std::mutex DataCollect::replayCountMutex_{};
std::mutex DataCollect::rangeConfigMutex_{};
class DataCollectWithSimulator : public DataCollect {
class SharedRecord {
public:
static SharedRecord &Instance() {
static SharedRecord inst;
return inst;
}
const std::string &GetTmpDumpPath() {
if (tmpDumpPath_.empty()) {
tmpDumpPath_ = GetEnv(CAMODEL_LOG_PATH_ENV);
}
return tmpDumpPath_;
}
std::map<const KernelHandle *, std::string> binaryPathMap_;
private:
SharedRecord() : tmpDumpPath_(GetEnv(CAMODEL_LOG_PATH_ENV)) {}
std::string tmpDumpPath_;
};
public:
virtual ~DataCollectWithSimulator() = default;
explicit DataCollectWithSimulator(const LaunchContextSP &ctx) : DataCollect(ctx) {};
void ProfInit(const void *hdl, const void *stubFunc, bool type) override;
bool ProfData() override { return HandleDumpLogAfterLaunch(); }
static bool SaveObject(const KernelHandle *hdl);
static bool SaveObject(const RegisterContextSP &ctx);
private:
bool MakeCaFileReadable(const std::string &filePath) const;
void CopyAiCoreBinFile(const KernelHandle *hdl);
void ClearCaFile(const std::string &fileName) const;
bool HandleDumpLogAfterLaunch();
};
class DataCollectInDevice : public DataCollect {
public:
explicit DataCollectInDevice(const LaunchContextSP &ctx, bool isInitOutput = true)
: DataCollect(ctx, isInitOutput) {
taskPtr_ = ProfTaskFactory::Create();
}
virtual ~DataCollectInDevice() = default;
void ProfInit(const void *hdl, const void *stubFunc, bool type) override;
bool ProfData(rtStream_t stream, const std::function<bool(void)> &kernelLaunchFunc) override;
bool InstrProfData(rtStream_t stream, const std::function<bool(void)> &kernelLaunchFunc) override;
void GenBBcountFile(uint64_t regId, uint64_t memSize, uint8_t *memInfo) override;
void GenDBIData(uint64_t memSize, uint8_t *memInfo) override;
void GenRecordData(uint64_t memSize, uint8_t *memInfo, const std::string &recordName) const override;
bool RangeReplay(const rtStream_t &stream, const aclmdlRI &modelRI) override;
private:
bool ReplayOnce(rtStream_t stream, const std::function<bool(void)> &kernelLaunchFunc, L2cacheParam ¶m);
bool KernelReplay(rtStream_t stream, const std::function<bool(void)> &kernelLaunchFunc);
bool KernelLaunchForInstrProf(rtStream_t stream, const std::function<bool(void)> &kernelLaunchFunc);
bool SupportProfL2CacheEvict();
void MC2KernelLaunch(rtStream_t stream, const std::function<bool(void)> &kernelLaunchFunc, bool &ret);
void ProfCommandAction(MsprofCommandHandleType type) const;
bool StartProf(std::thread &th);
void StopProf() {
if (taskPtr_ == nullptr) {
return;
}
taskPtr_->Stop();
}
std::string KernelNameConver(const std::string &kernelName) const {
auto start = kernelName.find("_Z");
if (start == std::string::npos) {
return kernelName;
}
uint64_t kernelNameLength = 0;
auto end = start + 2;
for (; end < kernelName.size(); end++) {
if (!std::isdigit(kernelName.at(end))) {
break;
}
kernelNameLength = kernelNameLength * 10 + (kernelName[end] - '0');
if (end + 1 + kernelNameLength > kernelName.length()) {
return kernelName;
}
}
if (kernelNameLength > 0) {
return kernelName.substr(end, kernelNameLength);
}
return kernelName;
}
bool IsPmuEventEmpty(uint32_t replayCount) const {
std::string socVersion = DeviceContext::Local().GetSocVersion();
auto &config = ProfConfig::Instance().GetConfig();
ChipProductType chipType = GetProductTypeBySocVersion(socVersion);
uint32_t pmuEventMaxNum = PMU_EVENT_MAX_NUM;
uint32_t eventMaxNum = EVENT_MAX_NUM;
if (IsChipSeriesTypeValid(chipType, ChipProductType::ASCEND950_SERIES)) {
pmuEventMaxNum = PMU_EVENT_MAX_NUM_A5;
eventMaxNum = EVENT_MAX_NUM_A5;
}
uint32_t nextPmuId = replayCount * pmuEventMaxNum;
if ((nextPmuId >= eventMaxNum) || (config.aicPmu[nextPmuId] == 0 && config.aivPmu[nextPmuId] == 0)) {
return true;
}
return false;
}
bool IsReceiveSignal() const { return g_receiveSignal; }
void LoadFrequency();
void SaveBasicInfo();
void WarmUp(rtStream_t stream, const std::function<bool(void)> &kernelLaunchFunc) const;
void WarmUp(rtStream_t stream, const std::function<bool(void)> &kernelLaunchFunc, uint16_t warmUpTimes) const;
void WarmUp(const rtStream_t &stream, const aclmdlRI &modelRI) const;
bool PrepareClearL2CacheParam(L2cacheParam ¶m) const;
bool MallocBuffer(L2cacheParam ¶m) const;
bool WaitClearL2Cache(L2cacheParam ¶m) const;
void FreeL2Cache(L2cacheParam ¶m) const;
bool ClearL2Cache(L2cacheParam ¶m) const;
bool CallEmptyKernel(void *stream) const;
size_t GetReplayTimes() const { return static_cast<size_t>(EVENT_MAX_NUM / PMU_EVENT_MAX_NUM); }
bool RangeReplayProfData(const rtStream_t &stream);
bool RangeReplayInit(bool &needReplay);
void SaveFrequency(const string &outputPath) const;
bool RangeReplayOnce(L2cacheParam ¶m, aclmdlRI const &modelRI);
SimulatorLauncher simulatorLauncher;
std::thread readThread_;
std::atomic<uint32_t> replayCount_{0};
int32_t curFreq_{-1};
int64_t ratedFreq_{-1};
std::unique_ptr<ProfTask> taskPtr_;
bool isClearParamSuccess_{false};
bool profL2cacheEvict_ = false;
};
DataCollect::DataCollect(const LaunchContextSP &ctx, bool isInitOutput) {
if (ctx != nullptr &&
ctx->GetFuncContext()->GetRegisterContext()->GetBinaryType() ==
aclrtBinaryLoadOptionType::ACL_RT_BINARY_LOAD_OPT_CPU_KERNEL_MODE) {
outputPath_ = "";
DEBUG_LOG("ship aicpu kernel launch");
return;
}
deviceId_ = DeviceContext::GetRunningDeviceId();
int32_t devId = ProfConfig::Instance().IsSimulator() ? 0 : deviceId_;
if (!isInitOutput) {
return;
}
constexpr uint32_t unixFileNameLimit = 255;
constexpr uint32_t kernelNameReserved = 72;
if (MsTx::Instance().IsInMstxRange()) {
if (ctx == nullptr) {
kernelName_ = KernelContext::Instance().GetLaunchName();
} else {
launchCtx_ = ctx;
kernelName_ = launchCtx_->GetFuncContext() != nullptr ? launchCtx_->GetFuncContext()->GetKernelName() : "";
}
}
std::string kernelName(kernelName_);
if (kernelName.size() >= unixFileNameLimit) {
size_t hashLen = kernelName.size() - kernelNameReserved;
auto hashStr = kernelName.substr(kernelNameReserved, hashLen);
std::hash<std::string> hashFunc;
size_t hashValue = hashFunc(hashStr);
kernelName = kernelName.substr(0, kernelNameReserved) + "_" + std::to_string(hashValue);
}
hasSimt_ = launchCtx_ != nullptr ? launchCtx_->GetFuncContext()->GetRegisterContext()->HasSimtSymbol()
: KernelContext::Instance().HasSimtSymbol();
if (ProfConfig::Instance().IsSimulatorLaunchedByDevice()) {
outputPath_ = GetEnv(MSOPPROF_OUTPUT_DUMP_PATH_ENV);
} else {
outputPath_ = ProfConfig::Instance().GetOutputPathFromRemote(kernelName, devId);
std::lock_guard<std::mutex> lk(outputMutex_);
deviceOutputPathMap_[devId] = outputPath_;
}
if (!outputPath_.empty() && !MkdirRecusively(outputPath_)) {
WARN_LOG("Output path create failed, path is: %s", outputPath_.c_str());
}
}
void SimulatorLauncher::Launch(const std::string &dumpPath, uint64_t launchId, bool aclNew) {
std::string outputDir = JoinPath({dumpPath, "kernel_data"});
if (!KernelDumper::Instance().Dump(outputDir, launchId, aclNew)) {
WARN_LOG("Msopt dump kernel failed");
return;
}
std::vector<std::string> envs = SetEnvToSimu(dumpPath);
std::vector<std::string> args = GetLaunchArgs(outputDir);
std::vector<char *> argumentsOutput = ToRawCArgv(args);
const pid_t pid{fork()};
if (pid < 0) {
WARN_LOG("Fork kernel-launcher process failed");
} else if (pid == 0) {
execvpe(kernelLaunchBinPath_.c_str(), argumentsOutput.data(), ToRawCArgv(envs).data());
_exit(EXIT_FAILURE);
} else {
int status;
waitpid(pid, &status, 0);
if (status != 0) {
WARN_LOG("Child process exited with status %d", status);
}
}
}
bool SimulatorLauncher::RuntimeToTargetLib(
std::map<std::string, std::string> &env, const std::string &runtimePath, const std::string &targetPath) const
{
if (runtimePath.empty() || !MkdirRecusively(runtimePath)) {
ERROR_LOG("Sym link failed when dispose ca path");
return false;
}
std::string soName = JoinPath({runtimePath, "libruntime.so"});
std::string ldEnv = env["LD_LIBRARY_PATH"];
if (IsExist(soName)) {
RemoveAll(soName);
}
if (symlink(targetPath.c_str(), soName.c_str()) != 0) {
ERROR_LOG("Symbol link runtime to simulator failed");
return false;
}
env["LD_LIBRARY_PATH"] = runtimePath;
if (!ldEnv.empty()) {
env["LD_LIBRARY_PATH"] += ":" + ldEnv;
}
DEBUG_LOG("Symbol link runtime to simulator success, so path is %s, simulator path is %s", soName.c_str(),
targetPath.c_str());
return true;
}
std::vector<std::string> SimulatorLauncher::SetEnvToSimu(const std::string &dumpPath) {
std::map<std::string, std::string> env;
std::string camodelPath = dumpPath;
if (!RollbackPath(camodelPath, 3)) {
return {};
}
camodelPath = JoinPath({camodelPath, "tmp_dump"});
env[MSOPPROF_OUTPUT_DUMP_PATH_ENV] = dumpPath;
env[CAMODEL_LOG_PATH_ENV] = camodelPath;
env[DEVICE_TO_SIMULATOR] = "true";
env[IS_SIMULATOR_ENV] = "true";
env["LD_PRELOAD"] = opprofInjectionLib_ + ":libruntime_camodel.so";
MkdirRecusively(camodelPath);
std::vector<std::string> outEnvs;
RuntimeToTargetLib(env, env[CAMODEL_LOG_PATH_ENV], opprofInjectionLib_);
JoinWithSystemEnv(env, outEnvs, true);
return outEnvs;
}
std::vector<std::string> SimulatorLauncher::GetLaunchArgs(const std::string &outputDir) {
std::string binFilePath = JoinPath({outputDir, KERNEL_CONFIG_NAME});
std::vector<std::string> kernelLaunchArgs;
kernelLaunchArgs.emplace_back(kernelLaunchBinPath_);
kernelLaunchArgs.emplace_back("-c");
kernelLaunchArgs.emplace_back(binFilePath);
return kernelLaunchArgs;
}
SimulatorLauncher::SimulatorLauncher() {
std::string opprofPath = ProfConfig::Instance().GetMsopprofPath();
if (!opprofPath.empty()) {
opprofInjectionLib_ = JoinPath({opprofPath, MSOPPROF_INJECTION_LIB_PATH_FROM_MSOPPROF});
kernelLaunchBinPath_ = JoinPath({opprofPath, "bin", "kernel-launcher"});
if (!IsExist(kernelLaunchBinPath_) || !IsExecutable(kernelLaunchBinPath_)) {
ERROR_LOG("Kernel launcher not found or not executable: %s", kernelLaunchBinPath_.c_str());
}
}
}
void DataCollectWithSimulator::ProfInit(const void *hdl, const void *stubFunc, bool type) {
DEBUG_LOG("Kernel running, kernel name is %s", kernelName_.c_str());
if (outputPath_.empty()) {
CamodelHelper::Instance().Disable();
return;
}
INFO_LOG("Start profiling on kernel: %s", kernelName_.c_str());
if (!MkdirRecusively(outputPath_)) {
WARN_LOG("Output path create failed, path is: %s", outputPath_.c_str());
outputPath_ = "";
return;
}
if (launchCtx_ != nullptr) {
CopyAiCoreBinFile(launchCtx_->GetFuncContext()->GetRegisterContext()->GetHandle());
} else {
CopyAiCoreBinFile(hdl);
}
std::string basicInfoTxt = JoinPath({SharedRecord::Instance().GetTmpDumpPath(), "object_dump.txt"});
WriteStringToFile(basicInfoTxt, outputPath_ + '\n' + kernelName_);
uint64_t tiling = 0;
uint64_t pcStart = 0;
if (launchCtx_ != nullptr) {
pcStart = launchCtx_->GetFuncContext()->GetStartPC();
} else {
KernelContext::LaunchEvent event;
if (KernelContext::Instance().GetLastLaunchEvent(event)) {
tiling = event.tilingKey;
}
if (type && !GetPcStartAddr(stubFunc, tiling, type, pcStart)) {
DEBUG_LOG("Get pc start failed by stubFunc. Using pc start in dump log");
}
if (!type && !GetPcStartAddr(hdl, tiling, type, pcStart)) {
DEBUG_LOG("Get pc start failed by hdl. Using pc start in dump log");
}
}
if (pcStart != 0) {
WriteStringToFile(JoinPath({outputPath_, "pc_start_addr.txt"}), NumToHexString(pcStart),
std::fstream::out | std::fstream::binary);
}
if (GetEnv("ENABLE_CA_LOG_TRANS") == "true") {
if (ProfConfig::Instance().IsEnableLogTrans()) {
CamodelHelper::Instance().Enable();
ProfConfig::Instance().RequestLogTranslate(outputPath_, kernelName_);
} else {
ProfConfig::Instance().RequestLogTranslate("", "");
}
}
}
bool DataCollectWithSimulator::MakeCaFileReadable(const std::string &filePath) const {
if (IsWritable(filePath)) {
return true;
}
std::size_t end = filePath.find_last_of('.');
std::size_t start = filePath.rfind('.', end - 1);
if (start != std::string::npos && end != std::string::npos) {
std::string tempFile = filePath.substr(start + 1, end - start - 1);
if (!IsDigit(tempFile)) {
return true;
}
if (!Chmod(filePath, SAVE_DATA_FILE_AUTHORITY)) {
return false;
}
DEBUG_LOG("Change file permission, file name is %s", filePath.c_str());
}
return true;
}
void DataCollectWithSimulator::CopyAiCoreBinFile(const KernelHandle *hdl) {
using namespace std::experimental::filesystem;
auto iter = SharedRecord::Instance().binaryPathMap_.find(hdl);
if (iter == SharedRecord::Instance().binaryPathMap_.end()) {
WARN_LOG("Copy aicore bin file failed");
return;
}
std::string tempPath = iter->second;
if (IsExist(JoinPath({outputPath_, AICORE_KERNEL_NAME}))) {
DEBUG_LOG("Aicore kernel already in out put path");
return;
}
if (!IsExist(tempPath)) {
WARN_LOG("Copy aicore bin file failed, tempPath is %s", tempPath.c_str());
return;
}
CopyFile(tempPath, outputPath_);
}
void DataCollectWithSimulator::ClearCaFile(const std::string &fileName) const {
std::string realPath = fileName;
if (!CheckWriteFilePathValid(realPath)) {
WARN_LOG("check file path %s failed", realPath.c_str());
return;
}
if (!MakeCaFileReadable(realPath)) {
return;
}
std::ofstream file(realPath, std::ios::trunc);
if (!file.is_open()) {
WARN_LOG("Can not find dump file path [%s]", realPath.c_str());
return;
}
file.close();
}
bool DataCollectWithSimulator::HandleDumpLogAfterLaunch() {
using namespace std::experimental::filesystem;
if (ProfConfig::Instance().IsEnableLogTrans() && CamodelHelper::Instance().IsEnable()) {
CamodelHelper::Instance().SendSync();
ProfConfig::Instance().NotifyStopTransLog();
}
std::string tmpDumpPath = SharedRecord::Instance().GetTmpDumpPath();
if (tmpDumpPath.empty() || !IsExist(tmpDumpPath)) {
WARN_LOG("Tmp dump file path is not Exist, path is [%s]", tmpDumpPath.c_str());
return false;
}
if (!outputPath_.empty() && IsExist(outputPath_)) {
for (auto const &dirEntry : directory_iterator(tmpDumpPath)) {
if (IsDir(dirEntry.path()) || IsSoftLink(dirEntry.path())) {
continue;
}
std::string fileName = dirEntry.path().filename().string();
if (fileName.find("dump") == std::string::npos) {
continue;
}
bool retCopy = CopyFile(dirEntry.path().string(), outputPath_);
if (retCopy) {
std::string dstFilePath = JoinPath({outputPath_, dirEntry.path().filename().string()});
Chmod(dstFilePath, SAVE_DATA_FILE_AUTHORITY);
}
ClearCaFile(dirEntry.path().string());
}
return true;
}
for (auto const &dirEntry : directory_iterator(tmpDumpPath)) {
if (IsDir(dirEntry.path()) || IsSoftLink(dirEntry.path())) {
continue;
}
ClearCaFile(dirEntry.path().string());
}
return true;
}
bool DataCollectWithSimulator::SaveObject(const KernelHandle *hdl) {
if (SharedRecord::Instance().binaryPathMap_.count(hdl) != 0) {
return true;
}
std::string objFileName = GenerateTimeStamp<std::chrono::nanoseconds>();
std::string outputDir = JoinPath({SharedRecord::Instance().GetTmpDumpPath(), objFileName});
if (KernelContext::Instance().DumpKernelObject(hdl, outputDir, AICORE_KERNEL_NAME)) {
SharedRecord::Instance().binaryPathMap_[hdl] = JoinPath({outputDir, AICORE_KERNEL_NAME});
return true;
}
return false;
}
bool DataCollectWithSimulator::SaveObject(const RegisterContextSP &ctx) {
auto hdl = ctx->GetHandle();
if (SharedRecord::Instance().binaryPathMap_.count(hdl) != 0) {
return true;
}
std::string objFileName = GenerateTimeStamp<std::chrono::nanoseconds>();
std::string outputDir = JoinPath({SharedRecord::Instance().GetTmpDumpPath(), objFileName});
std::string outputPath = JoinPath({outputDir, AICORE_KERNEL_NAME});
if (!IsExist(outputDir) && !MkdirRecusively(outputDir)) {
WARN_LOG("Save binary file failed, cannot create output dir");
return false;
}
if (ctx->Save(outputPath)) {
SharedRecord::Instance().binaryPathMap_[hdl] = outputPath;
return true;
}
WARN_LOG("Can not save binary file to %s", outputPath.c_str());
return false;
}
void DataCollectInDevice::ProfInit(const void *hdl, const void *stubFunc, bool type) {
if (outputPath_.empty()) {
return;
}
std::map<std::string, Elf64_Shdr> headers;
uint64_t tiling = 0;
uint64_t pcStart = 0;
if (launchCtx_ != nullptr) {
const std::vector<char> &binary = launchCtx_->GetFuncContext()->GetRegisterContext()->GetElfData();
GetSectionHeaders(binary, headers);
std::string outputPath = JoinPath({outputPath_, AICORE_KERNEL_NAME});
if (!launchCtx_->GetFuncContext()->GetRegisterContext()->Save(outputPath)) {
WARN_LOG("Save obj failed, output path is %s", outputPath_.c_str());
}
pcStart = launchCtx_->GetFuncContext()->GetStartPC();
} else {
rtDevBinary_t binary;
KernelContext::Instance().GetDevBinary(KernelContext::KernelHandlePtr{hdl}, binary) &&
GetSectionHeaders(binary, headers);
KernelDumper::Instance().DumpAicore(outputPath_);
KernelContext::LaunchEvent event;
if (KernelContext::Instance().GetLastLaunchEvent(event)) {
tiling = event.tilingKey;
}
if (type && !GetPcStartAddr(stubFunc, tiling, type, pcStart)) {
DEBUG_LOG("Get pc start failed by stubFunc. Using pc start in dump log");
}
if (!type && !GetPcStartAddr(hdl, tiling, type, pcStart)) {
DEBUG_LOG("Get pc start failed by hdl. Using pc start in dump log");
}
}
for (const auto &h : headers) {
if (h.first == "Attr_Section_Lcal") {
KernelContext::Instance().SetLcclFlag(true);
DEBUG_LOG("set lccl flag");
break;
}
}
KernelContext::Instance().SetMC2Flag();
if (pcStart != 0) {
WriteStringToFile(JoinPath({outputPath_, "pc_start_addr.txt"}), NumToHexString(pcStart),
std::fstream::out | std::fstream::binary);
}
}
void DataCollectInDevice::ProfCommandAction(MsprofCommandHandleType type) const {
using RtCommandHandleParamsT = struct {
uint32_t pathLen;
uint32_t storageLimit;
uint32_t profDataLen;
char path[PARAM_LEN_MAX + 1];
char profData[PATH_LEN_MAX + 1];
};
using RtProfCommandHandleT = struct {
uint64_t profSwitch;
uint64_t profSwitchHi;
uint32_t devNums;
uint32_t devIdList[MSPROF_MAX_DEV_NUM];
uint32_t modelId;
uint32_t type;
uint32_t cacheFlag;
RtCommandHandleParamsT commandHandleParams;
};
RtProfCommandHandleT command;
command.type = static_cast<uint32_t>(type);
command.devNums = MSPROF_DEVICE_NUM;
command.devIdList[0] = static_cast<uint32_t>(deviceId_);
command.modelId = PROF_INVALID_MODE_ID;
auto res = ACL_SUCCESS;
std::string socVersion = DeviceContext::Local().GetSocVersion();
auto chipType = GetProductTypeBySocVersion(socVersion);
if (IsChipSeriesTypeValid(chipType, ChipProductType::ASCEND310P_SERIES)) {
command.profSwitch = PROF_AICORE_METRICS;
} else if (IsChipSeriesTypeValid(chipType, ChipProductType::ASCEND910B_SERIES) ||
IsChipSeriesTypeValid(chipType, ChipProductType::ASCEND910_93_SERIES)) {
command.profSwitch = PROF_L2CACHE | PROF_OP_TIMESTAMP;
} else if (IsChipSeriesTypeValid(chipType, ChipProductType::ASCEND950_SERIES)) {
if (ProfConfig::Instance().IsInstrTimelineEnabled() || ProfConfig::Instance().IsPipeTimelineEnabled() ||
(ProfConfig::Instance().IsPCSamplingEnabled() && hasSimt_)) {
command.profSwitch = PROF_INSTR | PROF_OP_TIMESTAMP;
} else {
command.profSwitch = PROF_OP_TIMESTAMP;
}
}
res = profSetProfCommandOrigin(static_cast<void *>(&command), sizeof(RtProfCommandHandleT));
DEBUG_LOG("profSetProfCommandOrigin type %d res is %d", static_cast<int>(type), static_cast<int>(res));
}
bool DataCollectInDevice::StartProf(std::thread &th) {
std::call_once(g_sigRegFlag, [&]() { SignalWrapper::RegisterCallback(SIGINT, HandleSigInt); });
DEBUG_LOG("MSOPT INJECTION SUCCESS");
if (!taskPtr_->Start(replayCount_, hasSimt_)) {
return false;
}
{
std::lock_guard<std::mutex> lk(DataCollect::replayCountMutex_);
deviceReplayCountMap_[deviceId_] = replayCount_;
}
th = std::thread(&ProfTask::ChannelRead, taskPtr_.get());
DEBUG_LOG("Polling data read thread create");
return true;
}
void DataCollectInDevice::GenBBcountFile(uint64_t regId, uint64_t memSize, uint8_t *memInfo) {
if (!IsPlatformSupportDBI()) {
DEBUG_LOG("Unsupported platform, exit DBI");
return;
}
std::string storePath = GetEnv(DEVICE_PROF_DUMP_PATH_ENV);
if (storePath.empty() || !IsExist(storePath)) {
WARN_LOG("Error in device dump path");
return;
}
std::string extraName = BBCountDumper::Instance().GenExtraAndReturnName(outputPath_, regId, memSize, memInfo);
if (extraName.empty()) {
WARN_LOG("Extra BB count file dump failed, memsize is %lu", memSize);
return;
}
std::string kernelFile = JoinPath({outputPath_, "kernel" + std::to_string(regId) + ".o"});
std::string newKernelFile = JoinPath({outputPath_, AICORE_KERNEL_NAME});
if (!IsExist(kernelFile) || rename(kernelFile.c_str(), newKernelFile.c_str()) != 0) {
WARN_LOG("Rename aicore file failed");
}
}
void DataCollectInDevice::GenDBIData(uint64_t memSize, uint8_t *memInfo) {
DEBUG_LOG("Gen DBI data, memSize is %lu", memSize);
if (memSize == 0) {
return;
}
std::vector<uint8_t> memInfoHost(MAX_BLOCK_DATA_SIZE);
uint64_t count = memSize / BLOCK_MEM_SIZE;
for (uint64_t i = 0; i < count; ++i) {
aclError error = aclrtMemcpyImplOrigin(memInfoHost.data(), MAX_BLOCK_DATA_SIZE, memInfo + i * BLOCK_MEM_SIZE,
MAX_BLOCK_DATA_SIZE, aclrtMemcpyKind::ACL_MEMCPY_DEVICE_TO_HOST);
if (error != RT_ERROR_NONE) {
ERROR_LOG("dump basic block data rtMemcpy memInfo error: %d", error);
break;
}
auto bh = reinterpret_cast<const BlockHeader *>(memInfoHost.data());
uint64_t length = bh->length;
if (length == 0) {
continue;
}
uint64_t overflow = 0;
if ((bh->length & RECORD_OVERFLOW_BIT) != 0) {
if (((bh->length ^ RECORD_OVERFLOW_BIT) == bh->count) || ((bh->length ^ RECORD_OVERFLOW_BIT) < bh->count)) {
continue;
}
overflow = (bh->length ^ RECORD_OVERFLOW_BIT) - bh->count;
length = MAX_BLOCK_DATA_SIZE - sizeof(BlockHeader);
}
DBIDataHeader dbiDataHeader{bh->count, length, overflow, static_cast<uint16_t>(i), 0};
ProfPacketHead head{ProfPacketType::DBI_DATA, static_cast<uint32_t>(length + sizeof(dbiDataHeader))};
std::string msg = Serialize(head, dbiDataHeader);
auto begin = memInfoHost.begin() + sizeof(BlockHeader);
msg.insert(msg.end(), begin, begin + static_cast<long>(length));
ProfConfig::Instance().SendMsg(msg);
}
DBIDataHeader dbiHeader{0, outputPath_.size(), 0, 0, 1};
ProfPacketHead head{ProfPacketType::DBI_DATA, static_cast<uint32_t>(dbiHeader.length + sizeof(dbiHeader))};
ProfConfig::Instance().SendMsg(Serialize(head, dbiHeader) + outputPath_);
}
void DataCollectInDevice::GenRecordData(uint64_t memSize, uint8_t *memInfo, const std::string &recordName) const {
DEBUG_LOG("Gen record %s, memSize is %lu", recordName.c_str(), memSize);
if (memSize == 0) {
return;
}
void *memInfoHost;
if (aclrtMallocHostImplOrigin(&memInfoHost, memSize) != ACL_SUCCESS) {
ERROR_LOG("dump basic block data aclrtMallocHost");
return;
}
aclError error =
aclrtMemcpyImplOrigin(memInfoHost, memSize, memInfo, memSize, aclrtMemcpyKind::ACL_MEMCPY_DEVICE_TO_HOST);
if (error != ACL_SUCCESS) {
ERROR_LOG("dump basic block data aclrtMemcpy memInfo error: %d", error);
aclrtFreeHostImplOrigin(memInfoHost);
memInfoHost = nullptr;
return;
}
auto path = JoinPath({outputPath_, recordName});
if (WriteBinary(path, reinterpret_cast<const char *>(memInfoHost), memSize) == 0) {
WARN_LOG("Write %s failed", recordName.c_str());
}
aclrtFreeHostImplOrigin(memInfoHost);
}
void DataCollectInDevice::MC2KernelLaunch(
rtStream_t stream, const std::function<bool(void)> &kernelLaunchFunc, bool &ret) {
ret = false;
auto hcclComm = KernelContext::Instance().GetHcclComm();
if (hcclComm == nullptr) {
WARN_LOG("Get hccl comm failed, device: %d", deviceId_);
return;
}
aclrtEvent event = nullptr;
int aclrtRes = ACL_ERROR_NONE;
aclrtRes = aclrtCreateEventOrigin(&event);
if (aclrtRes != ACL_ERROR_NONE) {
WARN_LOG("MC2 event create failed, device: %d, res: %d", deviceId_, aclrtRes);
return;
}
std::shared_ptr<void> deferA(nullptr, [&event](std::nullptr_t &) { aclrtDestroyEventOrigin(event); });
HcclResult hcclRes = HcclBarrierOrigin(hcclComm, stream);
if (hcclRes != HCCL_SUCCESS) {
WARN_LOG("Call hccl barrier failed, device: %d, res: %d", deviceId_, hcclRes);
return;
}
aclrtRes = aclrtRecordEventOrigin(event, stream);
if (aclrtRes != ACL_ERROR_NONE) {
WARN_LOG("MC2 event record failed, device: %d, res: %d", deviceId_, aclrtRes);
return;
}
AicpuLaunchArgs &aicpuLaunchArgs = KernelContext::GetAicpuLaunchArgs();
aclrtRes = aclrtStreamWaitEventOrigin(aicpuLaunchArgs.stm, event);
if (aclrtRes != ACL_ERROR_NONE) {
WARN_LOG("MC2 event wait failed, device: %d, res: %d", deviceId_, aclrtRes);
return;
}
rtError_t aicpuRet = rtAicpuKernelLaunchExWithArgsOrigin(aicpuLaunchArgs.kernelType, &aicpuLaunchArgs.opName[0],
aicpuLaunchArgs.blockDim, aicpuLaunchArgs.argsInfo, aicpuLaunchArgs.smDesc, aicpuLaunchArgs.stm,
aicpuLaunchArgs.flags);
ret = kernelLaunchFunc();
LoadFrequency();
aclError npuSyncRet{ACL_SUCCESS};
if (ret) {
npuSyncRet = aclrtSynchronizeStreamWithTimeoutImplOrigin(stream, SYNCHRONIZE_TIME_OUT);
DEBUG_LOG("MC2 AICore synchronize, device: %d, res: %d", deviceId_, npuSyncRet);
}
aclError cpuSyncRet{ACL_SUCCESS};
if (aicpuRet == RT_ERROR_NONE) {
cpuSyncRet = aclrtSynchronizeStreamWithTimeoutImplOrigin(aicpuLaunchArgs.stm, SYNCHRONIZE_TIME_OUT);
DEBUG_LOG("MC2 AICPU synchronize, device: %d, res: %d", deviceId_, cpuSyncRet);
}
if (cpuSyncRet != ACL_SUCCESS) {
WARN_LOG("AICPU execute failed, device: %d, res: %d", deviceId_, aicpuRet);
}
aclrtRes = aclrtResetEventOrigin(event, aicpuLaunchArgs.stm);
if (aclrtRes != ACL_ERROR_NONE) {
WARN_LOG("MC2 event reset failed, device: %d, res: %d", deviceId_, aclrtRes);
}
}
bool DataCollectInDevice::ReplayOnce(
rtStream_t stream, const std::function<bool(void)> &kernelLaunchFunc, L2cacheParam ¶m) {
bool isMC2 = KernelContext::Instance().GetMC2Flag();
uint16_t warmUpTimes = ProfConfig::Instance().GetWarmUpTimes();
if (IsPmuEventEmpty(replayCount_) && replayCount_ != 0) {
DEBUG_LOG("Skip profiling because pmu is empty");
return true;
}
DEBUG_LOG("Replaying round on device %d No. %d time", deviceId_, replayCount_ + 1);
if (!ProfConfig::Instance().IsAppReplay() && isClearParamSuccess_ && !ClearL2Cache(param)) {
WARN_LOG("Clear L2Cache failed. replay count is %d", replayCount_ + 1);
}
if (!isMC2 && !ProfConfig::Instance().IsAppReplay() && !KernelContext::Instance().GetLcclFlag() &&
!MemoryContext::Instance().Restore()) {
WARN_LOG("Replay data restore failed. Skipping %d", replayCount_ + 1);
return false;
}
if (!StartProf(readThread_)) {
StopProf();
WARN_LOG("Start profiling failed. Skipping %d", replayCount_ + 1);
return false;
}
if (profL2cacheEvict_ && CallEmptyKernel(stream) == ACL_SUCCESS) {
DEBUG_LOG("Success run empty kernel for l2cache");
}
bool ret = true;
if (KernelContext::Instance().GetLcclFlag() && !profL2cacheEvict_) {
for (uint16_t i = 0; i < warmUpTimes; ++i) {
kernelLaunchFunc();
}
}
aclError syncRet{ACL_SUCCESS};
if (isMC2) {
MC2KernelLaunch(stream, kernelLaunchFunc, ret);
} else {
ret = kernelLaunchFunc();
if (IsPmuEventEmpty(replayCount_ + 1)) {
LoadFrequency();
}
if (ret) {
syncRet = aclrtSynchronizeStreamWithTimeoutImplOrigin(stream, SYNCHRONIZE_TIME_OUT);
}
}
usleep(WAIT_DATA_READ_TIME);
StopProf();
if (readThread_.joinable()) {
readThread_.join();
}
if (syncRet == ACL_SUCCESS && ret) {
return true;
} else {
WARN_LOG("Kernel run on device %d No. %d time failed.", deviceId_, replayCount_ + 1);
}
return ret;
}
bool DataCollectInDevice::KernelLaunchForInstrProf(
rtStream_t stream, const std::function<bool(void)> &kernelLaunchFunc) {
if (ProfConfig::Instance().IsPCSamplingEnabled()) {
WarmUp(stream, kernelLaunchFunc, 20);
}
ProfCommandAction(MsprofCommandHandleType::PROF_COMMANDHANDLE_TYPE_START);
bool isMC2 = KernelContext::Instance().GetMC2Flag();
std::thread readThread;
if (!StartProf(readThread)) {
StopProf();
WARN_LOG("Start profiling failed. Skipping KernelLaunchForInstrProf");
return false;
}
bool ret = true;
aclError syncRet{ACL_SUCCESS};
if (isMC2) {
MC2KernelLaunch(stream, kernelLaunchFunc, ret);
} else {
ret = kernelLaunchFunc();
if (ret) {
syncRet = aclrtSynchronizeStreamWithTimeoutImplOrigin(stream, SYNCHRONIZE_TIME_OUT);
}
}
StopProf();
if (readThread.joinable()) {
readThread.join();
}
if (syncRet == ACL_SUCCESS && ret) {
return ret;
} else {
WARN_LOG("Kernel run on device %d for instr prof failed.", deviceId_);
}
if (ProfConfig::Instance().IsAppReplay()) {
return ret;
}
if (!isMC2 && !KernelContext::Instance().GetLcclFlag()) {
MemoryContext::Instance().Restore();
}
return ret;
}
bool DataCollectInDevice::KernelReplay(rtStream_t stream, const std::function<bool(void)> &kernelLaunchFunc) {
if (ProfConfig::Instance().IsDbi()) {
return true;
}
replayCount_ = ProfConfig::Instance().GetInitReplayCount();
bool funcRet{true};
bool isMC2 = KernelContext::Instance().GetMC2Flag();
L2cacheParam param{};
param.blockLen = nullptr;
param.l2Buffer = nullptr;
param.stream = stream;
isClearParamSuccess_ = !ProfConfig::Instance().IsAppReplay() && PrepareClearL2CacheParam(param);
WarmUp(stream, kernelLaunchFunc);
ProfCommandAction(MsprofCommandHandleType::PROF_COMMANDHANDLE_TYPE_START);
if (SupportProfL2CacheEvict()) {
bool res = ReplayOnce(stream, kernelLaunchFunc, param);
profL2cacheEvict_ = false;
if (ProfConfig::Instance().IsAppReplay()) {
return res;
}
}
for (; replayCount_ < GetReplayTimes() && !IsReceiveSignal(); replayCount_++) {
auto profRes = ReplayOnce(stream, kernelLaunchFunc, param);
funcRet = funcRet && profRes;
if (ProfConfig::Instance().IsAppReplay()) {
return funcRet;
}
}
if (!isMC2 && !KernelContext::Instance().GetLcclFlag()) {
MemoryContext::Instance().Restore();
}
FreeL2Cache(param);
return funcRet;
}
bool DataCollectInDevice::InstrProfData(rtStream_t stream, const std::function<bool(void)> &kernelLaunchFunc) {
if (ProfConfig::Instance().IsRangeReplay()) {
return false;
}
DEBUG_LOG("Kernel running for instr prof, kernel name is %s", kernelName_.c_str());
if (outputPath_.empty()) {
return true;
}
if (!IsExist(outputPath_)) {
return false;
}
bool isMC2 = KernelContext::Instance().GetMC2Flag();
if (!ProfConfig::Instance().IsAppReplay() && !KernelContext::Instance().GetLcclFlag() && !isMC2 &&
!MemoryContext::Instance().Backup()) {
INFO_LOG("memoryContext failed!");
return false;
}
DEBUG_LOG("Start instr profiling on device %d, kernel: %s", deviceId_, kernelName_.c_str());
aclrtSynchronizeStreamImplOrigin(stream);
auto ret = KernelLaunchForInstrProf(stream, kernelLaunchFunc);
return ret;
}
bool DataCollectInDevice::SupportProfL2CacheEvict() {
std::string socVersion = DeviceContext::Local().GetSocVersion();
auto chipType = GetProductTypeBySocVersion(socVersion);
if (!IsChipSeriesTypeValid(chipType, ChipProductType::ASCEND910B_SERIES) &&
!IsChipSeriesTypeValid(chipType, ChipProductType::ASCEND910_93_SERIES)) {
return false;
}
auto profConfig = ProfConfig::Instance().GetConfig();
if (profConfig.aicPmu[0] != 0 && profConfig.aivPmu[0] != 0 && profConfig.l2CachePmu[0] != 0) {
profL2cacheEvict_ = true;
return true;
}
return false;
}
bool DataCollectInDevice::RangeReplayInit(bool &needReplay) {
auto threadId = std::this_thread::get_id();
RangeReplayConfig rangeConfig = ProfDataCollect::GetThreadRangeConfigMap(threadId);
bool hasValidOutput =
any_of(rangeConfig.outputs.begin(), rangeConfig.outputs.end(), [](const string &out) { return out != "-1"; });
if (!rangeConfig.flag || !hasValidOutput) {
needReplay = false;
DEBUG_LOG("No need to range replay, device: %d.", deviceId_);
ProfDataCollect::ResetRangeConfig(threadId);
return true;
}
ProfDataCollect::ResetRangeConfig(threadId);
string replayPath = JoinPath({GetEnv(DEVICE_PROF_DUMP_PATH_ENV), "device" + to_string(deviceId_),
to_string(getpid()), to_string(rangeConfig.count)});
if (!MkdirRecusively(replayPath)) {
ERROR_LOG("Mkdir device %d range replay temp path failed.", deviceId_);
return false;
}
string outputTxt = JoinPath({replayPath, "output.txt"});
if (!CheckWriteFilePathValid(outputTxt)) {
ERROR_LOG("Check file %s failed, maybe is not writeable.", outputTxt.c_str());
return false;
}
std::ofstream outFile(outputTxt.c_str(), std::ios::out | std::ios::binary);
if (!outFile.is_open()) {
ERROR_LOG("Cannot open file [%s]", outputTxt.c_str());
return false;
}
for (const auto &i : rangeConfig.outputs) {
outFile << i << "\n";
}
outFile.close();
Chmod(outputTxt, SAVE_DATA_FILE_AUTHORITY);
{
std::lock_guard<std::mutex> lk(rangeConfigMutex_);
threadRangeConfigMap_[threadId].count += 1;
}
{
std::lock_guard<std::mutex> lk(outputMutex_);
deviceOutputPathMap_[deviceId_] = replayPath;
DEBUG_LOG("set device %d range replay temp path: %s", deviceId_, replayPath.c_str());
}
return true;
}
bool DataCollectInDevice::RangeReplayOnce(L2cacheParam ¶m, const aclmdlRI &modelRI) {
if (isClearParamSuccess_ && !ClearL2Cache(param)) {
WARN_LOG("Clear L2Cache failed. replay count is %d", replayCount_ + 1);
}
if (!MemoryContext::Instance().Restore()) {
WARN_LOG("Replay data restore failed. Skipping %d", replayCount_ + 1);
return false;
}
if (!StartProf(readThread_)) {
StopProf();
WARN_LOG("Start profiling failed. Skipping %d", replayCount_ + 1);
return false;
}
aclError ret = aclmdlRIExecuteAsyncImplOrigin(modelRI, param.stream);
if (IsPmuEventEmpty(replayCount_ + 1)) {
LoadFrequency();
SaveFrequency(ProfDataCollect::GetAicoreOutputPath(deviceId_));
}
if (ret == ACL_ERROR_NONE) {
ret = aclrtSynchronizeStreamWithTimeoutImplOrigin(param.stream, SYNCHRONIZE_TIME_OUT);
}
usleep(WAIT_DATA_READ_TIME);
StopProf();
if (readThread_.joinable()) {
readThread_.join();
}
if (ret == ACL_ERROR_NONE) {
return true;
} else {
WARN_LOG("Kernel run on device %d No. %d time failed, res is %d.", deviceId_, replayCount_ + 1, ret);
return false;
}
}
bool DataCollectInDevice::RangeReplay(const rtStream_t &stream, const aclmdlRI &modelRI) {
bool funcRet{false};
bool needReplay{true};
if (!RangeReplayInit(needReplay)) {
return funcRet;
}
if (!needReplay) {
return true;
}
MemoryContext::Instance().Backup();
WarmUp(stream, modelRI);
L2cacheParam param{};
param.blockLen = nullptr;
param.l2Buffer = nullptr;
param.stream = stream;
isClearParamSuccess_ = PrepareClearL2CacheParam(param);
replayCount_ = ProfConfig::Instance().GetInitReplayCount();
ProfCommandAction(MsprofCommandHandleType::PROF_COMMANDHANDLE_TYPE_START);
for (; replayCount_ < GetReplayTimes() && !IsReceiveSignal(); replayCount_++) {
if (replayCount_ != 0 && IsPmuEventEmpty(replayCount_)) {
break;
}
DEBUG_LOG("Replaying round on device %d No. %d time", deviceId_, replayCount_ + 1);
if (RangeReplayOnce(param, modelRI)) {
funcRet = true;
}
}
FreeL2Cache(param);
return funcRet;
}
bool DataCollectInDevice::RangeReplayProfData(const rtStream_t &stream) {
auto threadId = std::this_thread::get_id();
RangeReplayConfig rangeConfig = ProfDataCollect::GetThreadRangeConfigMap(threadId);
if (!rangeConfig.flag) {
auto res = aclmdlRICaptureBeginImplOrigin(stream, ACL_MODEL_RI_CAPTURE_MODE_GLOBAL);
if (res != ACL_ERROR_NONE) {
ERROR_LOG("Range replay begin failed, res is %d", res);
return false;
}
{
std::lock_guard<std::mutex> lk(rangeConfigMutex_);
threadRangeConfigMap_[threadId].flag = true;
threadRangeConfigMap_[threadId].stream = stream;
}
}
string tmpOutput = outputPath_;
if (outputPath_.empty()) {
tmpOutput = "-1";
}
{
std::lock_guard<std::mutex> lk(rangeConfigMutex_);
threadRangeConfigMap_[threadId].outputs.emplace_back(tmpOutput);
}
return true;
}
bool DataCollectInDevice::ProfData(rtStream_t stream, const std::function<bool(void)> &kernelLaunchFunc) {
DEBUG_LOG("Kernel running, kernel name is %s", kernelName_.c_str());
bool isRange = ProfConfig::Instance().IsRangeReplay() && MsTx::Instance().IsInMstxRange();
if (isRange && !RangeReplayProfData(stream)) {
return false;
}
if (outputPath_.empty()) {
return true;
}
if (!IsExist(outputPath_)) {
return false;
}
if (isRange) {
INFO_LOG("Kernel will profiling on device %d, kernel: %s", deviceId_, kernelName_.c_str());
SaveBasicInfo();
return true;
}
if (!ProfConfig::Instance().IsAppReplay() && !KernelContext::Instance().GetLcclFlag() &&
!MemoryContext::Instance().Backup()) {
INFO_LOG("memoryContext failed!");
return false;
}
INFO_LOG("Start profiling on device %d, kernel: %s", deviceId_, kernelName_.c_str());
aclrtSynchronizeStreamImplOrigin(stream);
bool isMC2 = KernelContext::Instance().GetMC2Flag();
if (isMC2) {
AicpuLaunchArgs &aicpuLaunchArgs = KernelContext::GetAicpuLaunchArgs();
kernelLaunchFunc();
aclrtSynchronizeStreamWithTimeoutImplOrigin(stream, SYNCHRONIZE_TIME_OUT);
aclrtSynchronizeStreamWithTimeoutImplOrigin(aicpuLaunchArgs.stm, SYNCHRONIZE_TIME_OUT);
}
auto ret = KernelReplay(stream, kernelLaunchFunc);
if (ret) {
if (!ProfConfig::Instance().IsAppReplay() || !ProfConfig::Instance().IsDbi()) {
SaveBasicInfo();
}
}
if (ProfConfig::Instance().GetConfig().isDeviceToSimulator && !isMC2) {
if (launchCtx_ != nullptr) {
auto launchId = launchCtx_->GetLaunchParam().launchId;
simulatorLauncher.Launch(outputPath_, launchId, true);
} else {
simulatorLauncher.Launch(outputPath_);
}
}
return ret;
}
void DataCollectInDevice::WarmUp(rtStream_t stream, const std::function<bool(void)> &kernelLaunchFunc) const {
uint16_t warmUpTimes = ProfConfig::Instance().GetWarmUpTimes();
WarmUp(stream, kernelLaunchFunc, warmUpTimes);
}
void DataCollectInDevice::WarmUp(
rtStream_t stream, const std::function<bool(void)> &kernelLaunchFunc, uint16_t warmUpTimes) const {
if (warmUpTimes == 0 || KernelContext::Instance().GetMC2Flag() || KernelContext::Instance().GetLcclFlag()) {
return;
}
INFO_LOG("Warm Up enabled. times:%d", warmUpTimes);
if (!ProfConfig::Instance().IsAppReplay()) {
for (uint16_t i = 0; i < warmUpTimes; ++i) {
kernelLaunchFunc();
}
if (aclrtSynchronizeStreamImplOrigin(stream) != ACL_SUCCESS) {
return;
}
DEBUG_LOG("Warm Up success in kernel replay mode.");
return;
}
uint8_t *memInfoA = nullptr;
uint8_t *memInfoB = nullptr;
if (aclrtMallocImplOrigin(reinterpret_cast<void **>(&memInfoA), MB_TO_BYTES, ACL_MEM_MALLOC_HUGE_FIRST) !=
ACL_SUCCESS) {
return;
}
std::shared_ptr<void> deferA(nullptr, [&memInfoA](std::nullptr_t &) { aclrtFreeImplOrigin(memInfoA); });
if (aclrtMallocImplOrigin(reinterpret_cast<void **>(&memInfoB), MB_TO_BYTES, ACL_MEM_MALLOC_HUGE_FIRST) !=
ACL_SUCCESS) {
return;
}
std::shared_ptr<void> deferB(nullptr, [&memInfoB](std::nullptr_t &) { aclrtFreeImplOrigin(memInfoB); });
for (uint16_t i = 0; i < warmUpTimes; ++i) {
if (aclrtMemcpyAsyncImplOrigin(
memInfoB, MB_TO_BYTES, memInfoA, MB_TO_BYTES, ACL_MEMCPY_DEVICE_TO_DEVICE, stream) != ACL_SUCCESS) {
return;
}
}
if (aclrtSynchronizeStreamImplOrigin(stream) != RT_ERROR_NONE) {
return;
}
DEBUG_LOG("Warm Up success in application replay mode.");
}
void DataCollectInDevice::WarmUp(const rtStream_t &stream, const aclmdlRI &modelRI) const {
uint16_t warmUpTimes = ProfConfig::Instance().GetWarmUpTimes();
if (warmUpTimes == 0) {
return;
}
INFO_LOG("Warm Up enabled. times:%d", warmUpTimes);
for (uint16_t i = 0; i < warmUpTimes; ++i) {
aclmdlRIExecuteAsyncImplOrigin(modelRI, stream);
}
if (aclrtSynchronizeStreamWithTimeoutImplOrigin(stream, SYNCHRONIZE_TIME_OUT) == ACL_ERROR_NONE) {
DEBUG_LOG("Warm Up success in range replay mode.");
}
}
bool DataCollectInDevice::PrepareClearL2CacheParam(L2cacheParam ¶m) const {
std::string socVersion = DeviceContext::Local().GetSocVersion();
auto it = l2CacheClearTilingMap.find(socVersion);
if (it == l2CacheClearTilingMap.end()) {
WARN_LOG("Cannot get L2Cache info when clear L2Cache.");
return false;
}
uint64_t bufferSize = it->second.clearSizePerCore * it->second.aicCoreNum;
param.bufferSize = bufferSize;
if (!MallocBuffer(param)) {
return false;
}
if (aclrtMallocImplOrigin(¶m.l2Buffer, bufferSize, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) {
WARN_LOG("Malloc buffer failed when clear L2Cache.");
return false;
}
void *hostBlockLen;
if (aclrtMallocHostImplOrigin(&hostBlockLen, sizeof(uint64_t)) != ACL_SUCCESS) {
WARN_LOG("Malloc host tiling info failed when clear L2Cache.");
aclrtFreeImplOrigin(param.l2Buffer);
return false;
}
shared_ptr<void> hostBlockLenDefer(
nullptr, [&hostBlockLen](std::nullptr_t &) { aclrtFreeHostImplOrigin(hostBlockLen); });
*(uint64_t *)hostBlockLen = it->second.clearSizePerCore;
if (aclrtMallocImplOrigin(¶m.blockLen, 32, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) {
WARN_LOG("Malloc tiling info failed when clear L2Cache.");
aclrtFreeImplOrigin(param.l2Buffer);
return false;
}
if (aclrtMemcpyAsyncImplOrigin(param.blockLen, sizeof(uint64_t), hostBlockLen, sizeof(uint64_t),
ACL_MEMCPY_HOST_TO_DEVICE, param.stream) != ACL_SUCCESS ||
aclrtSynchronizeStreamImplOrigin(param.stream) != ACL_SUCCESS) {
WARN_LOG("Memcpy tiling info failed when clear L2Cache.");
aclrtFreeImplOrigin(param.l2Buffer);
aclrtFreeImplOrigin(param.blockLen);
return false;
}
param.blockDim = it->second.aicCoreNum;
DEBUG_LOG("Prepare clear L2Cache param success.");
return true;
}
bool DataCollectInDevice::MallocBuffer(L2cacheParam ¶m) const {
auto ret = CheckAclResult(
aclrtMallocImplOrigin(¶m.buffer, param.bufferSize, ACL_MEM_MALLOC_HUGE_FIRST), "buffer aclrtMallocImpl");
if (ret != ACL_SUCCESS) {
return false;
}
ret = CheckAclResult(aclrtMallocImplOrigin(¶m.flushBuffer, param.bufferSize, ACL_MEM_MALLOC_HUGE_FIRST),
"flush buffer aclrtMallocImpl");
if (ret != ACL_SUCCESS) {
return false;
}
ret = CheckAclResult(aclrtMallocImplOrigin(¶m.cmoBuffer, param.bufferSize, ACL_MEM_MALLOC_HUGE_FIRST),
"cmoBuffer aclrtMallocImpl");
return ret == ACL_SUCCESS;
}
bool DataCollectInDevice::WaitClearL2Cache(L2cacheParam ¶m) const {
const std::string &runSocVersion = DeviceContext::Local().GetSocVersion();
auto ret = CheckAclResult(aclrtMemcpyAsyncImplOrigin(param.flushBuffer, param.bufferSize, param.buffer,
param.bufferSize, ACL_MEMCPY_DEVICE_TO_DEVICE, param.stream),
"aclrtMemcpyAsyncImpl");
if (ret != ACL_SUCCESS) {
return false;
}
ret = CheckAclResult(aclrtSynchronizeStreamImplOrigin(param.stream), "aclrtSynchronizeStreamImpl");
if (ret != ACL_SUCCESS) {
return false;
}
auto chipType = GetProductTypeBySocVersion(runSocVersion);
if (IsChipSeriesTypeValid(chipType, ChipProductType::ASCEND910B_SERIES) ||
IsChipSeriesTypeValid(chipType, ChipProductType::ASCEND910_93_SERIES)) {
ret = CheckAclResult(
aclrtCmoAsyncImplOrigin(param.cmoBuffer, param.bufferSize, ACL_RT_CMO_TYPE_PREFETCH, param.stream),
"aclrtCmoAsyncImpl");
if (ret != ACL_SUCCESS) {
return false;
}
}
ret = CheckAclResult(aclrtSynchronizeStreamImplOrigin(param.stream), "aclrtSynchronizeStreamImpl");
return ret == ACL_SUCCESS;
}
void DataCollectInDevice::FreeL2Cache(L2cacheParam ¶m) const {
if (param.flushBuffer != nullptr) {
CheckAclResult(aclrtFreeImplOrigin(param.flushBuffer), "flush buffer aclrtFreeImpl");
}
if (param.cmoBuffer != nullptr) {
CheckAclResult(aclrtFreeImplOrigin(param.cmoBuffer), "cmo buffer aclrtFreeImpl");
}
if (param.buffer != nullptr) {
CheckAclResult(aclrtFreeImplOrigin(param.buffer), "buffer aclrtFreeImpl");
}
if (param.l2Buffer != nullptr) {
CheckAclResult(aclrtFreeImplOrigin(param.l2Buffer), "l2Buffer aclrtFreeImpl");
}
if (param.blockLen != nullptr) {
CheckAclResult(aclrtFreeImplOrigin(param.blockLen), "blockLen aclrtFreeImpl");
}
}
bool DataCollectInDevice::ClearL2Cache(L2cacheParam ¶m) const {
std::vector<void *> inputArgs = {param.l2Buffer, param.blockLen};
if (!DFXKernelLauncher::Instance().CallClearL2Cache(param.blockDim, param.stream, inputArgs)) {
WARN_LOG("Failed to clear L2cache by operator");
return false;
}
if (!WaitClearL2Cache(param)) {
WARN_LOG("Failed to clear L2cache by move memory to L2");
return false;
}
return true;
}
bool DataCollectInDevice::CallEmptyKernel(void *stream) const {
if (!DFXKernelLauncher::Instance().CallEmptyKernel(stream)) {
WARN_LOG("Failed to call empty kernel for L2cache");
return false;
}
return true;
}
void DataCollectInDevice::LoadFrequency() {
auto ret = halGetDeviceInfoOrigin(deviceId_, MODULE_TYPE_AICORE, INFO_TYPE_FREQUE, &ratedFreq_);
if (ret != tagDrvError::DRV_ERROR_NONE) {
WARN_LOG("Can not get rated aicore frequency.Please check dlopen function.");
ratedFreq_ = -1;
}
int32_t size = sizeof(int32_t);
ret = halGetDeviceInfoByBuffOrigin(
deviceId_, MODULE_TYPE_AICORE, INFO_TYPE_CURRENT_FREQ, static_cast<void *>(&curFreq_), &size);
if (ret != tagDrvError::DRV_ERROR_NONE) {
WARN_LOG("Can not get current aicore frequency.Please check dlopen function. Error code:%d", ret);
curFreq_ = -1;
}
DEBUG_LOG("get current freq: %d, rated freq: %ld.", curFreq_, ratedFreq_);
}
void DataCollectInDevice::SaveBasicInfo() {
std::string basicInfoTxt = JoinPath({outputPath_, "op_basic_info.txt"});
if (!CheckWriteFilePathValid(basicInfoTxt)) {
ERROR_LOG("check file: %s failed", basicInfoTxt.c_str());
return;
}
std::ofstream outFile(basicInfoTxt.c_str(), std::ios::out | std::ios::binary);
if (!outFile.is_open()) {
ERROR_LOG("Cannot create file [%s]", basicInfoTxt.c_str());
return;
}
const std::string runSocVersion = DeviceContext::Local().GetSocVersion();
uint32_t blockDim;
if (launchCtx_ != nullptr) {
blockDim = launchCtx_->GetLaunchParam().blockDim;
} else {
blockDim = KernelContext::Instance().GetBlockId();
}
std::string newKernelName = KernelNameConver(kernelName_);
outFile << "Op Name=" << newKernelName << "\n";
outFile << "Block Dim=" << blockDim << "\n";
outFile << "Run Soc Version=" << runSocVersion << "\n";
outFile << "Device Id=" << deviceId_ << "\n";
if (!ProfConfig::Instance().IsAppReplay()) {
outFile << "Pid=" << getpid() << "\n";
}
if (!ProfConfig::Instance().IsRangeReplay()) {
if (curFreq_ != -1) {
outFile << "Current Freq=" << curFreq_ << "\n";
} else {
outFile << "Current Freq=" << "NA" << "\n";
}
if (ratedFreq_ != -1) {
outFile << "Rated Freq=" << ratedFreq_ << "\n";
} else {
outFile << "Rated Freq=" << "NA" << "\n";
}
}
outFile << "Is MC2=" << static_cast<int>(KernelContext::Instance().GetMC2Flag()) << "\n";
outFile << "Is Lccl=" << static_cast<int>(KernelContext::Instance().GetLcclFlag()) << "\n";
outFile << "Has Simt=" << static_cast<int>(hasSimt_) << "\n";
outFile.close();
Chmod(basicInfoTxt, SAVE_DATA_FILE_AUTHORITY);
}
void DataCollectInDevice::SaveFrequency(const string &outputPath) const {
std::string outputFile = JoinPath({outputPath, "freq.txt"});
if (!CheckWriteFilePathValid(outputFile)) {
ERROR_LOG("check file: %s failed", outputFile.c_str());
return;
}
std::ofstream outFile(outputFile.c_str(), std::ios::out | std::ios::binary);
if (!outFile.is_open()) {
ERROR_LOG("Cannot create file [%s]", outputFile.c_str());
return;
}
if (curFreq_ != -1) {
outFile << "Current Freq=" << curFreq_ << "\n";
} else {
outFile << "Current Freq=" << "NA" << "\n";
}
if (ratedFreq_ != -1) {
outFile << "Rated Freq=" << ratedFreq_ << "\n";
} else {
outFile << "Rated Freq=" << "NA" << "\n";
}
outFile.close();
Chmod(outputFile, SAVE_DATA_FILE_AUTHORITY);
}
ProfDataCollect::ProfDataCollect(const LaunchContextSP &ctx, bool isInitOutput) {
if (ProfConfig::Instance().IsSimulator()) {
dataCollect_ = MakeShared<DataCollectWithSimulator>(ctx);
} else {
dataCollect_ = MakeShared<DataCollectInDevice>(ctx, isInitOutput);
}
}
bool ProfDataCollect::InstrProfData(rtStream_t stream, const std::function<bool(void)> &kernelLaunchFunc) {
return dataCollect_->InstrProfData(stream, kernelLaunchFunc);
}
bool ProfDataCollect::ProfData(rtStream_t stream, const std::function<bool(void)> &kernelLaunchFunc) {
return dataCollect_->ProfData(stream, kernelLaunchFunc);
}
bool ProfDataCollect::ProfData() const { return dataCollect_->ProfData(); }
void ProfDataCollect::ProfInit(const void *hdl, const void *stubFunc, bool type) const {
dataCollect_->ProfInit(hdl, stubFunc, type);
}
bool ProfDataCollect::SaveObject(const void *hdl) { return DataCollectWithSimulator::SaveObject(hdl); }
bool ProfDataCollect::SaveObject(const RegisterContextSP &ctx) { return DataCollectWithSimulator::SaveObject(ctx); }
void ProfDataCollect::TerminateInAdvance() const {
auto currentDeviceId = DeviceContext::GetRunningDeviceId();
INFO_LOG("Kill op process on device %d for the number of kernel reaches collection range.", currentDeviceId);
(void)LocalProcess::GetInstance().TerminateWithSignal(SIGINT);
constexpr uint32_t killWaitTime = 30U;
std::this_thread::sleep_for(std::chrono::seconds(killWaitTime));
(void)LocalProcess::GetInstance().TerminateWithSignal(SIGKILL);
}
void ProfDataCollect::PostProcess() const {
ProcessCtrl::Rsp rsp{};
if (ProfConfig::Instance().PostNotify(rsp) && rsp.termination != 0) {
TerminateInAdvance();
}
}
void ProfDataCollect::GenBBcountFile(uint64_t regId, uint64_t memSize, uint8_t *memInfo) const {
dataCollect_->GenBBcountFile(regId, memSize, memInfo);
}
bool ProfDataCollect::IsNeedProf() const { return (!dataCollect_->outputPath_.empty()); }
bool ProfDataCollect::IsBBCountNeedGen() {
auto config = ProfConfig::Instance().GetConfig();
return ((config.dbiFlag & DBI_FLAG_BB_COUNT) && IsNeedProf() && !KernelContext::Instance().GetMC2Flag() &&
!KernelContext::Instance().GetLcclFlag());
}
bool ProfDataCollect::IsNeedDumpContext() {
return IsNeedProf() && !KernelContext::Instance().GetMC2Flag() &&
ProfConfig::Instance().GetConfig().isDeviceToSimulator;
}
bool ProfDataCollect::IsMemoryChartNeedGen() {
auto config = ProfConfig::Instance().GetConfig();
return ((config.dbiFlag & DBI_FLAG_MEMORY_CHART) && IsNeedProf() && !KernelContext::Instance().GetMC2Flag() &&
!KernelContext::Instance().GetLcclFlag());
}
bool ProfDataCollect::IsOperandRecordNeedGen() {
auto config = ProfConfig::Instance().GetConfig();
return ((config.dbiFlag & DBI_FLAG_OPERAND_RECORD) && IsNeedProf() && !KernelContext::Instance().GetMC2Flag() &&
!KernelContext::Instance().GetLcclFlag() && !ProfConfig::Instance().IsRangeReplay());
}
bool ProfDataCollect::IsPCSamplingNeedGen() { return (ProfConfig::Instance().IsPCSamplingEnabled() && IsNeedProf()); }
bool ProfDataCollect::IsPipeTimelineNeedGen() {
return (ProfConfig::Instance().IsPipeTimelineEnabled() && IsNeedProf());
}
bool ProfDataCollect::IsInstrTimelineNeedGen() {
return (ProfConfig::Instance().IsInstrTimelineEnabled() && IsNeedProf());
}
bool ProfDataCollect::IsWarpTimelineNeedGen() {
return (ProfConfig::Instance().IsWarpTimelineEnabled() && IsNeedProf() && dataCollect_->hasSimt_ &&
!ProfConfig::Instance().IsRangeReplay());
}
bool ProfDataCollect::IsNeedRunOriginLaunch() {
if (KernelContext::Instance().GetMC2Flag() || KernelContext::Instance().GetLcclFlag()) {
return false;
}
return !IsNeedProf() ||
!(ProfConfig::Instance().GetConfig().dbiFlag != DBI_FLAG_BB_COUNT && ProfConfig::Instance().IsAppReplay());
}
void ProfDataCollect::GenDBIData(uint64_t memSize, uint8_t *memInfo) const {
dataCollect_->GenDBIData(memSize, memInfo);
}
void ProfDataCollect::GenRecordData(uint64_t memSize, uint8_t *memInfo, const std::string &recordName) const {
dataCollect_->GenRecordData(memSize, memInfo, recordName);
}
bool ProfDataCollect::RangeReplay(const rtStream_t &stream, const aclmdlRI &modelRI) {
return dataCollect_->RangeReplay(stream, modelRI);
}
std::string ProfDataCollect::GetAicoreOutputPath(int32_t device) {
std::lock_guard<std::mutex> lk(DataCollect::outputMutex_);
if (DataCollect::deviceOutputPathMap_.find(device) == DataCollect::deviceOutputPathMap_.end()) {
ERROR_LOG("Can not find device %d output path", device);
return "";
}
return DataCollect::deviceOutputPathMap_[device];
}
uint32_t ProfDataCollect::GetDeviceReplayCount(int32_t device) {
std::lock_guard<std::mutex> lk(DataCollect::replayCountMutex_);
if (DataCollect::deviceReplayCountMap_.find(device) == DataCollect::deviceReplayCountMap_.end()) {
DEBUG_LOG("Can not find device %d replay count, use default 0.", device);
DataCollect::deviceReplayCountMap_[device] = 0;
}
return DataCollect::deviceReplayCountMap_[device];
}
RangeReplayConfig ProfDataCollect::GetThreadRangeConfigMap(std::thread::id threadId) {
std::lock_guard<std::mutex> lk(DataCollect::rangeConfigMutex_);
if (DataCollect::threadRangeConfigMap_.find(threadId) == DataCollect::threadRangeConfigMap_.end()) {
DEBUG_LOG("Can not find threadId %zu range replay config, use default value.",
std::hash<std::thread::id>()(threadId));
DataCollect::threadRangeConfigMap_[threadId] = {false, 0, nullptr, {}};
}
return DataCollect::threadRangeConfigMap_[threadId];
}
void ProfDataCollect::ResetRangeConfig(std::thread::id threadId) {
std::lock_guard<std::mutex> lk(DataCollect::rangeConfigMutex_);
if (DataCollect::threadRangeConfigMap_.find(threadId) != DataCollect::threadRangeConfigMap_.end()) {
DataCollect::threadRangeConfigMap_[threadId].flag = false;
DataCollect::threadRangeConfigMap_[threadId].stream = nullptr;
DataCollect::threadRangeConfigMap_[threadId].outputs = {};
}
}