* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "profiler/device/data_saver.h"
#include <fstream>
#include <numeric>
#include "sys/stat.h"
#include "utils/ms_utils.h"
#include "utils/ms_context.h"
namespace mindspore {
namespace profiler {
OpDetailInfo::OpDetailInfo(const std::shared_ptr<OpInfo> op_info, float proportion)
: op_info_(op_info), proportion_(proportion) {
op_full_name_ = op_info->op_name;
auto op_type_begin_iter = op_full_name_.rfind('/') + 1;
auto op_type_end_iter = op_full_name_.rfind('-');
op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter);
op_name_ = op_full_name_.substr(op_type_begin_iter);
if (op_info->op_count == 0) {
MS_LOG(ERROR) << "The num of operations can not be 0.";
return;
}
op_avg_time_ = op_info->op_host_cost_time / op_info->op_count;
}
void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) {
op_detail_infos_.reserve(op_info_maps.size());
float total_time_sum = GetTotalOpTime(op_info_maps);
for (auto item : op_info_maps) {
op_timestamps_map_[item.first] = item.second.start_duration;
if (total_time_sum == 0.0) {
MS_LOG(ERROR) << "The total operation times can not be 0.";
return;
}
float proportion = item.second.op_host_cost_time / total_time_sum;
auto op_info = std::make_shared<OpInfo>(item.second);
if (op_info == nullptr) {
MS_LOG(ERROR) << "Create Operation information node failed when parse operation information.";
return;
}
OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion);
op_detail_infos_.emplace_back(op_detail_info);
AddOpDetailInfoForType(op_detail_info);
}
for (auto &op_type : op_type_infos_) {
if (op_type.second.count_ == 0) {
MS_LOG(ERROR) << "The num of operation type can not be 0.";
return;
}
op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_;
}
MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items.";
MS_LOG(DEBUG) << "Get " << op_type_infos_.size() << " operation type items.";
}
void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) {
OpType op_type = OpType{op_detail_info.op_type_,
op_detail_info.op_info_->op_count,
op_detail_info.op_info_->op_count,
op_detail_info.op_info_->op_host_cost_time,
0,
op_detail_info.proportion_};
std::string type_name = op_detail_info.op_type_;
auto iter = op_type_infos_.find(type_name);
if (iter == op_type_infos_.end()) {
op_type_infos_.emplace(type_name, op_type);
} else {
iter->second += op_type;
}
}
float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) const {
float sum = 0;
sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum,
[](float i, auto iter) { return i + iter.second.op_host_cost_time; });
MS_LOG(DEBUG) << "The total op time is " << sum;
return sum;
}
void DataSaver::WriteOpType(const std::string &saver_base_dir) const {
std::string file_path = saver_base_dir + "/" + op_side_ + "_op_type_info_" + device_id_ + ".csv";
std::ofstream ofs(file_path);
if (!ofs.is_open()) {
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
return;
}
try {
if (op_side_ == "cpu") {
ofs << OpType().GetCpuHeader() << std::endl;
for (auto op_type_info : op_type_infos_) {
op_type_info.second.OutputCpuOpTypeInfo(ofs);
}
}
if (op_side_ == "gpu") {
ofs << OpType().GetGpuHeader() << std::endl;
for (auto op_type_info : op_type_infos_) {
op_type_info.second.OutputGpuOpTypeInfo(ofs);
}
}
} catch (const std::exception &e) {
MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what();
}
ofs.close();
ChangeFileMode(file_path);
MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path;
}
void DataSaver::WriteOpDetail(const std::string &saver_base_dir) const {
std::string file_path = saver_base_dir + "/" + op_side_ + "_op_detail_info_" + device_id_ + ".csv";
std::ofstream ofs(file_path);
if (!ofs.is_open()) {
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
return;
}
try {
if (op_side_ == "cpu") {
ofs << OpDetailInfo().GetCpuHeader() << std::endl;
for (auto op_detail : op_detail_infos_) {
op_detail.OutputCpuOpDetailInfo(ofs);
}
}
if (op_side_ == "gpu") {
ofs << OpDetailInfo().GetGpuHeader() << std::endl;
for (auto op_detail : op_detail_infos_) {
op_detail.OutputGpuOpDetailInfo(ofs);
}
}
} catch (const std::exception &e) {
MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what();
}
ofs.close();
ChangeFileMode(file_path);
MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path;
}
void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) const {
std::string file_path = saver_base_dir + "/" + op_side_ + "_op_execute_timestamp_" + device_id_ + ".txt";
std::ofstream ofs(file_path);
if (!ofs.is_open()) {
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
return;
}
try {
for (const auto &op_timestamp_info : op_timestamps_map_) {
if (op_side_ == "cpu") {
ofs << op_timestamp_info.first << ";HostCpuOps;";
} else {
ofs << op_timestamp_info.first << ";GpuOps;";
}
for (auto start_end : op_timestamp_info.second) {
ofs << start_end.start_timestamp << "," << start_end.duration << " ";
}
ofs << std::endl;
}
} catch (const std::exception &e) {
MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what();
}
ofs.close();
ChangeFileMode(file_path);
}
void DataSaver::ChangeFileMode(const std::string &file_path) const {
if (chmod(common::SafeCStr(file_path), S_IRUSR | S_IWUSR) == -1) {
MS_LOG(WARNING) << "Modify file: " << file_path << " to rw fail.";
return;
}
}
}
}