* Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved.
* libkperf licensed under the Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
* PURPOSE.
* See the Mulan PSL v2 for more details.
* Author: Mr.Gan
* Create: 2024-04-03
* Description: implementations for reading performance counters and initializing counting logic
* of PerfCounterDefault in the KUNPENG_PMU namespace.
******************************************************************************/
#include <climits>
#include <poll.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <cstring>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <iostream>
#include <linux/perf_event.h>
#include <linux/version.h>
#include "pmu.h"
#include "linked_list.h"
#include "pfm_event.h"
#include "pmu_event.h"
#include "pcerr.h"
#include "log.h"
#include "perf_counter_default.h"
#include "read_reg.h"
#include "common.h"
using namespace std;
using namespace pcerr;
struct GroupReadFormat {
__u64 nr;
__u64 timeEnabled;
__u64 timeRunning;
struct {
__u64 value;
__u64 id;
} values[];
};
* Read pmu counter and deal with pmu multiplexing
* Right now we do not implement grouping logic, thus we ignore the
* PERF_FORMAT_ID section for now
*/
int KUNPENG_PMU::PerfCounterDefault::Read(EventData &eventData)
{
if (__glibc_unlikely(this->fd < 0)) {
this->accumCount.clear();
return UNKNOWN_ERROR;
}
if (groupStatus == GroupStatus::NO_GROUP) {
return ReadSingleEvent(eventData.data);
} else if (groupStatus == GroupStatus::GROUP_LEADER) {
return ReadGroupEvents(eventData.data);
}
return SUCCESS;
}
namespace KUNPENG_PMU {
static int PerfMmapReadSelf(const std::shared_ptr<PerfMmap> &countMmap, struct ReadFormat &perfCountValue)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 9, 0)
uint32_t seq;
uint32_t idx;
uint32_t timeMult = 0;
uint32_t timeShift = 0;
uint64_t cnt = 0;
uint64_t cyc = 0;
uint64_t timeOffset = 0;
uint64_t timeCycles = 0;
uint64_t timeMask = ~0ULL;
auto pc = countMmap->base;
if (!pc) {
return LIBPERF_ERR_COUNT_MMAP_IS_NULL;
}
if (!pc->cap_user_rdpmc) {
return LIBPERF_ERR_ENABLE_USER_ACCESS_FAILED;
}
do {
seq = ReadOnce(&pc->lock);
Barrier();
perfCountValue.timeEnabled = ReadOnce(&pc->time_enabled);
perfCountValue.timeRunning = ReadOnce(&pc->time_running);
if (pc->cap_user_rdpmc && perfCountValue.timeEnabled != perfCountValue.timeRunning) {
cyc = ReadTimestamp();
timeMult = ReadOnce(&pc->time_mult);
timeShift = ReadOnce(&pc->time_shift);
timeOffset = ReadOnce(&pc->time_offset);
if (pc->cap_user_time_short) {
timeCycles = ReadOnce(&pc->time_cycles);
timeMask = ReadOnce(&pc->time_mask);
}
}
idx = ReadOnce(&pc->index);
cnt = ReadOnce(&pc->offset);
if (pc->cap_user_rdpmc && idx) {
int64_t eventCount = ReadPerfCounter(idx - 1);
uint16_t width = ReadOnce(&pc->pmc_width);
eventCount <<= 64 - width;
eventCount >>= 64 - width;
cnt += eventCount;
} else {
return LIBPERF_ERR_ALLOCATE_REGISTER_FAILED;
}
Barrier();
} while (ReadOnce(&pc->lock) != seq);
if (perfCountValue.timeEnabled != perfCountValue.timeRunning) {
uint64_t delta;
cyc = timeCycles + ((cyc - timeCycles) & timeMask);
delta = timeOffset + MulU64U32Shr(cyc, timeMult, timeShift);
perfCountValue.timeEnabled += delta;
if (idx) {
perfCountValue.timeRunning += delta;
}
}
perfCountValue.value = cnt;
#endif
return SUCCESS;
}
}
int KUNPENG_PMU::PerfCounterDefault::ReadSingleEvent(std::vector<PmuData> &data)
{
ReadFormat perfCountValue;
if (this->evt->enableUserAccess) {
if (!this->isCollect) {
CountValueToData(this->accumCount[0], this->enabled, this->running, this->accumCount[0], data);
return SUCCESS;
}
int err = PerfMmapReadSelf(this->countMmap, perfCountValue);
if (err != SUCCESS) {
return err;
}
} else {
int len = read(this->fd, &perfCountValue, sizeof(perfCountValue));
if (len < 0) {
New(UNKNOWN_ERROR, strerror(errno));
return UNKNOWN_ERROR;
}
}
if (accumCount.empty()) {
accumCount.assign(1, 0);
}
int err = CountValueToData(
perfCountValue.value, perfCountValue.timeEnabled, perfCountValue.timeRunning, accumCount[0], data);
if (err != SUCCESS) {
return err;
}
this->enabled = perfCountValue.timeEnabled;
this->running = perfCountValue.timeRunning;
return SUCCESS;
}
int KUNPENG_PMU::PerfCounterDefault::ReadGroupEvents(std::vector<PmuData> &data)
{
static const unsigned MAX_GROUP_EVENTS = 14;
unsigned readSize = sizeof(__u64)*3 + sizeof(__u64)*2*MAX_GROUP_EVENTS;
GroupReadFormat *perfCountValue = static_cast<GroupReadFormat*>(malloc(readSize));
if (perfCountValue == NULL) {
return COMMON_ERR_NOMEM;
}
int len = read(this->fd, perfCountValue, readSize);
if (len < 0) {
free(perfCountValue);
New(UNKNOWN_ERROR, strerror(errno));
return UNKNOWN_ERROR;
}
if (accumCount.empty()) {
accumCount.assign(perfCountValue->nr, 0);
}
for (int i = 0;i < accumCount.size(); ++i) {
auto err = CountValueToData(perfCountValue->values[i].value,
perfCountValue->timeEnabled,
perfCountValue->timeRunning,
accumCount[i],
data
);
if (err != SUCCESS) {
free(perfCountValue);
return err;
}
}
this->enabled = perfCountValue->timeEnabled;
this->running = perfCountValue->timeRunning;
free(perfCountValue);
return SUCCESS;
}
int KUNPENG_PMU::PerfCounterDefault::CountValueToData(const __u64 value, const __u64 timeEnabled,
const __u64 timeRunning, __u64 &accumCount, vector<PmuData> &data)
{
if (value < accumCount || timeEnabled < enabled || timeRunning < running) {
return LIBPERF_ERR_COUNT_OVERFLOW;
}
double percent = 0.0;
uint64_t increCount;
if (this->evt->enableUserAccess) {
percent = 1;
increCount = static_cast<uint64_t>(value - accumCount);
} else if ((value == accumCount) || (timeRunning == running)) {
percent = -1;
increCount = 0;
} else {
percent = static_cast<double>(timeEnabled - enabled) / static_cast<double>(timeRunning - running);
increCount = static_cast<uint64_t>((value - accumCount)* percent);
}
accumCount = value;
data.emplace_back(PmuData{0});
auto& current = data.back();
current.count = increCount;
current.countPercent = 1.0 / percent;
current.cpu = this->cpu;
current.tid = this->pid;
auto findProc = procMap.find(current.tid);
if (findProc != procMap.end()) {
current.pid = findProc->second->pid;
}
if(this->evt->cgroupName.size() != 0) {
current.cgroupName = this->evt->cgroupName.c_str();
}
return SUCCESS;
}
* Initialize counting
*/
int KUNPENG_PMU::PerfCounterDefault::Init(const bool groupEnable, const int groupFd, const int resetOutputFd)
{
int err = SUCCESS;
if (this->evt->enableUserAccess) {
err = this->MapPerfAttrUserAccess();
if (err != SUCCESS) {
return err;
}
err = this->Mmap();
return err;
}
err = this->MapPerfAttr(groupEnable, groupFd);
return err;
}
int KUNPENG_PMU::PerfCounterDefault::MapPerfAttr(const bool groupEnable, const int groupFd)
{
* For now, we only implemented the logic for CORE type events. Support for UNCORE PMU events will be
* added soon
*/
struct perf_event_attr attr;
memset(&attr, 0, sizeof(attr));
attr.size = sizeof(struct perf_event_attr);
attr.type = this->evt->type;
attr.config = this->evt->config;
attr.config1 = this->evt->config1;
attr.config2 = this->evt->config2;
* We want to set the disabled and inherit bit to collect child processes
*/
attr.disabled = 1;
attr.inherit = 1;
attr.exclude_kernel = this->evt->excludeKernel;
attr.exclude_user = this->evt->excludeUser;
if (this->evt->enableOnExec) {
attr.enable_on_exec = 1;
}
* if no permission try setting exclude_kernel=1.
*/
if (this->needTryExcludeKernel) {
attr.exclude_kernel = 1;
}
unsigned flags = 0;
int pid = this->pid;
if (this->GetCgroupFd() != -1) {
flags = PERF_FLAG_PID_CGROUP | PERF_FLAG_FD_CLOEXEC;
pid = this->GetCgroupFd();
}
* For now we set the format id bit to implement grouping logic in the future
*/
attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID;
if (groupEnable) {
* when creating an event group, typically the group leader is initialized with disabled bit set to 1,
* and any child events are initialized with disabled bit set to 0. Despite disabled bit being set to 0,
* the child events will not start counting until the group leader is enabled.
*/
if (groupFd != -1) {
attr.disabled = 0;
groupStatus = GroupStatus::GROUP_MEMBER;
} else {
groupStatus = GroupStatus::GROUP_LEADER;
}
attr.read_format |= PERF_FORMAT_GROUP;
this->fd = PerfEventOpen(&attr, pid, this->cpu, groupFd, flags);
} else {
this->fd = PerfEventOpen(&attr, pid, this->cpu, groupFd, flags);
groupStatus = GroupStatus::NO_GROUP;
}
this->groupFd = groupFd;
DBG_PRINT("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n",
attr.type, cpu, attr.config, attr.config1, attr.config2, this->fd, groupFd);
if (__glibc_unlikely(this->fd < 0)) {
return MapErrno(errno);
}
return SUCCESS;
}
int KUNPENG_PMU::PerfCounterDefault::MapPerfAttrUserAccess()
{
struct perf_event_attr attr;
memset(&attr, 0, sizeof(attr));
attr.size = sizeof(struct perf_event_attr);
attr.type = this->evt->type;
attr.config = this->evt->config;
attr.config1 = this->evt->config1;
attr.disabled = 1;
this->fd = PerfEventOpen(&attr, this->pid, this->cpu, -1, 0);
DBG_PRINT("type: %d cpu: %d config: %llx config1: %llx myfd: %d \n",
attr.type,
this->cpu,
attr.config,
attr.config1,
this->fd);
if (__glibc_unlikely(this->fd < 0)) {
return MapErrno(errno);
}
this->groupFd = -1;
return SUCCESS;
}
int KUNPENG_PMU::PerfCounterDefault::Mmap()
{
this->countMmap = std::make_shared<PerfMmap>();
this->countMmap->prev = 0;
this->countMmap->mask = -1;
void *currentMap =
mmap(NULL, COUNT_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, this->fd, 0);
if (__glibc_unlikely(currentMap == MAP_FAILED)) {
this->countMmap->base = nullptr;
close(this->fd);
return LIBPERF_ERR_FAIL_MMAP;
}
this->countMmap->base = static_cast<struct perf_event_mmap_page *>(currentMap);
this->countMmap->fd = this->fd;
return SUCCESS;
}
* Enable
*/
int KUNPENG_PMU::PerfCounterDefault::Enable()
{
if (groupFd != -1) {
return SUCCESS;
}
int err = PerfEvt::Enable();
if (err != SUCCESS) {
return err;
}
if (this->evt->enableUserAccess && this->countMmap->base->index == 0) {
return LIBPERF_ERR_COUNTER_INDEX_IS_ZERO;
}
this->isCollect = true;
this->accumCount.clear();
this->enabled = 0;
this->running = 0;
return SUCCESS;
}
int KUNPENG_PMU::PerfCounterDefault::Disable()
{
if (groupFd != -1) {
return SUCCESS;
}
int err = PerfEvt::Disable();
if (err == SUCCESS) {
this->isCollect = false;
}
return err;
}
int KUNPENG_PMU::PerfCounterDefault::Reset()
{
return PerfEvt::Reset();
}
int KUNPENG_PMU::PerfCounterDefault::Close()
{
if (this->countMmap && this->countMmap->base && this->countMmap->base != MAP_FAILED) {
munmap(this->countMmap->base, COUNT_PAGE_SIZE);
}
if (this->fd > 0) {
close(this->fd);
}
return SUCCESS;
}