* This file is part of the MindStudio project.
* Copyright (c) 2025 Huawei Technologies Co.,Ltd.
*
* MindStudio is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
*
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
* -------------------------------------------------------------------------
*/
#ifndef MS_SERVER_PROFILER_MARKER_H
#define MS_SERVER_PROFILER_MARKER_H
#include <string>
#include <thread>
#include <atomic>
#include <limits>
#include <nlohmann/json.hpp>
#include "ServiceProfilerInterface.h"
#include "Config.h"
#include "NpuMemoryUsage.h"
#include "acl/acl.h"
#include "mspti/mspti.h"
using Json = nlohmann::json;
namespace msServiceProfiler {
constexpr uint32_t INVALID_DEVICE_ID = std::numeric_limits<uint32_t>::max();
using AclprofConfig = struct aclprofConfig;
class ServiceProfilerManager {
public:
ServiceProfilerManager(const ServiceProfilerManager &) = delete;
ServiceProfilerManager& operator=(const ServiceProfilerManager &) = delete;
static ServiceProfilerManager &GetInstance();
inline bool IsEnable(uint32_t level) const
{
return config_->GetEnable() && config_->GetLevel() >= level;
}
inline const std::string& GetAclTaskTimeLevel() const
{
return config_->GetAclTaskTimeLevel();
}
inline int GetAclProfAicoreMetricsValue() const
{
return static_cast<int>(config_->GetAclProfAicoreMetrics());
}
inline bool GetTorchProfStack() const
{
return config_->GetTorchProfStack();
}
inline bool GetTorchProfModules() const
{
return config_->GetTorchProfModules();
}
inline int GetTorchProfStepNum() const
{
return config_->GetTorchProfStepNum();
}
inline bool GetTorchProfilerEnable() const
{
return config_->GetTorchProfilerEnable();
}
inline bool GetEnableDomainFilter() const { return config_->GetEnableDomainFilter(); }
const std::set<std::string>& GetValidDomain() const { return config_->GetValidDomain(); }
void NotifyStartProfiler()
{
notifyStarted = true;
}
void NotifyStopProfiler()
{
notifyStarted = false;
}
void StartProfiler(bool isInit = false);
void StartAclProfiler(const std::string& profPath, uint32_t deviceID);
void StopProfiler();
void NotifyDeviceID(uint32_t deviceID);
void StopThread();
static std::string ToSemName(const std::string &oriSemName);
const std::string &GetConfigPath()
{
return config_->GetConfigPath();
}
const std::string &GetProfPath() const
{
return config_->GetProfPath();
}
void RegisterStartCallback(void (*callback)());
void RegisterStopCallback(void (*callback)());
void RegisterStartMetricCallback(void (*callback)());
void RegisterStopMetricCallback(void (*callback)());
private:
ServiceProfilerManager();
~ServiceProfilerManager();
void SetAclProfHostSysConfig() const;
void DynamicControl();
void LaunchThread();
void ThreadFunction();
void MarkFirstProcessAsMain();
AclprofConfig* ProfCreateConfig(uint32_t deviceID);
void StartMsptiProf(const std::string& profPath);
void StartAclProf(const std::string& profPath, uint32_t deviceID);
void StopAclProf();
void RecordMemoryUsage(NpuMemoryUsage& npuMemoryUsage);
void ProfTimerCtrl();
void ProfStepCtrl();
private:
bool isMaster_ = true;
bool started_ = false;
bool aclProfStarted_ = false;
bool msptiStarted_ = false;
bool profilerStoppedByLimit_ = false;
void *configHandle_ = nullptr;
msptiSubscriberHandle msptiHandle_;
int lastUpdate_ = 0;
std::chrono::high_resolution_clock::time_point initiate = std::chrono::high_resolution_clock::now();
std::atomic<bool> threadRunFlag_{true};
std::atomic<bool> notifyStarted{false};
std::atomic<uint32_t> deviceID_ {INVALID_DEVICE_ID};
int stopTargetStep_ = -1;
std::thread thread_;
std::shared_ptr<Config> config_;
void (*startCallback_)(void) = nullptr;
void (*stopCallback_)(void) = nullptr;
void (*startMetricCallback_)(void) = nullptr;
void (*stopMetricCallback_)(void) = nullptr;
bool metricStartCallbackInvoked_ = false;
};
}
#endif