* Copyright (c) 2026 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
#include "framework/runtime/dump/model_dump_manager.h"
#include "framework/runtime/dump/dump_config.h"
#include "framework/runtime/dump/dump_callback_manager.h"
#include "framework/runtime/dump/profiling_callback_manager.h"
#include "framework/runtime/dump/data_dump_impl.h"
#include "framework/runtime/dump/exception_dump_impl.h"
#include "framework/runtime/dump/overflow_dump_impl.h"
#include "framework/runtime/dump/profiling_impl.h"
#include "common/checker.h"
#include "framework/common/debug/ge_log.h"
namespace ge {
namespace dump {
Status ModelDumpManager::GlobalInit() {
GELOGD("ModelDumpManager::GlobalInit start");
DumpConfig::Instance().Reset();
GE_ASSERT_SUCCESS(DumpCallbackManager::GlobalInit());
GE_ASSERT_SUCCESS(ProfilingCallbackManager::GlobalInit());
return SUCCESS;
}
ModelDumpManager::ModelDumpManager(uint32_t model_id) : model_id_(model_id) {
GELOGD("ModelDumpManager constructed, model_id=%u", model_id);
data_dump_impl_ = std::make_unique<DataDumpImpl>();
exception_impl_ = std::make_unique<ExceptionDumpImpl>();
overflow_impl_ = std::make_unique<OverflowDumpImpl>();
profiling_impl_ = std::make_unique<ProfilingImpl>();
}
ModelDumpManager::~ModelDumpManager() {
Clear();
GELOGD("ModelDumpManager destructed, model_id=%u", model_id_);
}
Status ModelDumpManager::SetModelDumpInfo(const ModelDumpInfo& model_info) {
const char* model_name = (model_info.model_name != nullptr) ? model_info.model_name : "";
GELOGD("SetModelDumpInfo: model_id=%u, model_name=%s",
model_info.model_id, model_name);
model_info_ = model_info;
exception_impl_->SetDeviceId(model_info.device_id);
if (DumpConfig::Instance().IsOverflowDumpEnabled()) {
Status ret = overflow_impl_->RegisterForModel(model_info.rt_model_handle);
if (ret != SUCCESS) {
GELOGE(ret, "Overflow register failed for model_id=%u", model_info.model_id);
return ret;
}
data_dump_impl_->SetOpDebugInfo(overflow_impl_->GetOpDebugTaskId(),
overflow_impl_->GetOpDebugStreamId(),
overflow_impl_->GetOpDebugAddr());
GELOGD("Set overflow debug info: task_id=%u, stream_id=%u, addr=%p",
overflow_impl_->GetOpDebugTaskId(), overflow_impl_->GetOpDebugStreamId(),
overflow_impl_->GetOpDebugAddr());
}
return SUCCESS;
}
Status ModelDumpManager::ReportModelLoadBegin() const {
if (profiling_impl_ == nullptr) {
return SUCCESS;
}
return profiling_impl_->ReportModelLoadBegin(model_info_);
}
Status ModelDumpManager::ReportModelLoadEnd() const {
if (profiling_impl_ == nullptr) {
return SUCCESS;
}
return profiling_impl_->ReportModelLoadEnd(model_info_);
}
Status ModelDumpManager::IsDataDumpEnabled(const char* op_name, uint8_t* is_data_dump) const {
if (is_data_dump == nullptr) {
GELOGW("is_data_dump is null, skip");
return PARAM_INVALID;
}
const char* safe_op_name = (op_name != nullptr) ? op_name : "";
const bool need_data_dump = DumpConfig::Instance().IsDataDumpEnabled() &&
DumpConfig::Instance().IsOpNeedDump(safe_op_name);
GELOGD("IsDataDumpEnabled: op_name=%s, need_data_dump=%u",
safe_op_name, static_cast<uint32_t>(need_data_dump));
*is_data_dump = need_data_dump ? 1U : 0U;
return SUCCESS;
}
Status ModelDumpManager::PreprocessOm2TaskInfo(const Om2TaskInfo& task_info) {
const char* op_name = (task_info.op_name != nullptr) ? task_info.op_name : "";
GELOGD("PreprocessOm2TaskInfo: op_name=%s, stream_id=%u", op_name, task_info.stream_id);
if (task_info.l0_exception_dump_info == nullptr) {
return SUCCESS;
}
Status ret = exception_impl_->ReportL0ExceptionDumpInfo(task_info);
if (ret != SUCCESS) {
GELOGE(ret, "Report L0 exception dump info failed, op_name=%s", op_name);
return ret;
}
return SUCCESS;
}
Status ModelDumpManager::AddOm2TaskInfo(const Om2TaskInfo& task_info) {
const char* op_name = (task_info.op_name != nullptr) ? task_info.op_name : "";
GELOGD("AddOm2TaskInfo: op_name=%s, task_id=%u, stream_id=%u",
op_name, task_info.task_id, task_info.stream_id);
const bool need_data_dump = DumpConfig::Instance().IsDataDumpEnabled() &&
DumpConfig::Instance().IsOpNeedDump(op_name);
const bool need_overflow_dump = DumpConfig::Instance().IsOverflowDumpEnabled();
const bool need_save_to_data_dump = need_data_dump || need_overflow_dump;
GELOGD("AddOm2TaskInfo: op_name=%s, need_data_dump=%u, need_overflow_dump=%u, "
"need_save_to_data_dump=%u", op_name, need_data_dump, need_overflow_dump,
need_save_to_data_dump);
ModelTaskType type = static_cast<ModelTaskType>(task_info.task_type);
if (need_save_to_data_dump) {
Status ret = data_dump_impl_->SaveTask(task_info, type, task_info.stream,
overflow_impl_->IsOpDebugEnabled());
if (ret != SUCCESS) {
GELOGE(ret, "Save task dump info failed, op_name=%s", op_name);
return ret;
}
}
Status ret = exception_impl_->SaveOpInfo(task_info);
if (ret != SUCCESS) {
GELOGE(ret, "Save task exception info failed, op_name=%s", op_name);
return ret;
}
if (profiling_impl_ != nullptr) {
const Status prof_ret = profiling_impl_->SaveTaskInfo(task_info, model_info_);
if (prof_ret != SUCCESS) {
GELOGW("Save profiling task info failed, op_name=%s, ret=%u", op_name, prof_ret);
}
}
return SUCCESS;
}
Status ModelDumpManager::DispatchDumpInfo() {
GELOGD("DispatchDumpInfo: model_id=%u", model_id_);
if (DumpConfig::Instance().IsExceptionDumpEnabled()) {
GELOGD("Exception dump enabled, skip data dump dispatch");
return SUCCESS;
}
if (DumpConfig::Instance().IsDataDumpEnabled() || DumpConfig::Instance().IsOverflowDumpEnabled()) {
return data_dump_impl_->BuildAndLoadOpMappingInfo(model_info_);
}
return SUCCESS;
}
bool ModelDumpManager::GetOpDescInfo(const OpDescInfoId& op_id, OpDescInfo& op_info) const {
return exception_impl_->GetOpDescInfo(op_id, op_info);
}
void ModelDumpManager::Clear() {
GELOGD("Clear: model_id=%u", model_id_);
if (data_dump_impl_ != nullptr) {
data_dump_impl_->Clear();
}
if (exception_impl_ != nullptr) {
exception_impl_->Clear();
}
if (overflow_impl_ != nullptr && model_info_.rt_model_handle != nullptr) {
overflow_impl_->UnregisterForModel(model_info_.rt_model_handle);
}
}
}
}