* Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved.
* libkperf licensed under the Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
* PURPOSE.
* See the Mulan PSL v2 for more details.
* Author: Mr.Gan
* Create: 2024-04-03
* Description: declarations and definitions of interfaces and data structures exposed by perf.so
******************************************************************************/
#ifndef PMU_DATA_STRUCT_H
#define PMU_DATA_STRUCT_H
#include <unistd.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#pragma GCC visibility push(default)
enum PmuTaskType {
COUNTING = 0,
SAMPLING = 1,
SPE_SAMPLING = 2,
MAX_TASK_TYPE
};
enum PmuEventType {
CORE_EVENT,
UNCORE_EVENT,
TRACE_EVENT,
ALL_EVENT
};
enum SpeFilter {
SPE_FILTER_NONE = 0,
TS_ENABLE = 1UL << 0,
PA_ENABLE = 1UL << 1,
PCT_ENABLE = 1UL << 2,
JITTER = 1UL << 16,
BRANCH_FILTER = 1UL << 32,
LOAD_FILTER = 1UL << 33,
STORE_FILTER = 1UL << 34,
SPE_DATA_ALL = TS_ENABLE | PA_ENABLE | PCT_ENABLE | JITTER | BRANCH_FILTER | LOAD_FILTER | STORE_FILTER
};
enum SpeEventFilter {
SPE_EVENT_NONE = 0,
SPE_EVENT_RETIRED = 0x2,
SPE_EVENT_L1DMISS = 0x8,
SPE_EVENT_TLB_WALK = 0x20,
SPE_EVENT_MISPREDICTED = 0x80,
};
enum SymbolMode {
NO_SYMBOL_RESOLVE = 0,
RESOLVE_ELF = 1,
RESOLVE_ELF_DWARF = 2,
RESOLVE_DELAY_ELF = 3,
RESOLVE_DELAY_DWARF = 4
};
enum BranchSampleFilter {
KPERF_NO_BRANCH_SAMPLE = 0,
* The first part of the value is the privilege level,which is a combination of
* one of the values listed below. If the user does not set privilege level explicitly,
* the kernel will use the event's privilege level.Event and branch privilege levels do
* not have to match.
*/
KPERF_SAMPLE_BRANCH_USER = 1U << 0,
KPERF_SAMPLE_BRANCH_KERNEL = 1U << 1,
KPERF_SAMPLE_BRANCH_HV = 1U << 2,
KPERF_SAMPLE_BRANCH_ANY = 1U << 3,
KPERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4,
KPERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5,
KPERF_SAMPLE_BRANCH_IND_CALL = 1U << 6,
KPERF_SAMPLE_BRANCH_ABORT_TX = 1U << 7,
KPERF_SAMPLE_BRANCH_IN_TX = 1U << 8,
KPERF_SAMPLE_BRANCH_NO_TX = 1U << 9,
KPERF_SAMPLE_BRANCH_COND = 1U << 10,
KPERF_SAMPLE_BRANCH_CALL_STACK = 1U << 11,
KPERF_SAMPLE_BRANCH_IND_JUMP = 1U << 12,
KPERF_SAMPLE_BRANCH_CALL = 1U << 13,
KPERF_SAMPLE_BRANCH_NO_FLAGES = 1U << 14,
KPERF_SAMPLE_BRANCH_NO_CYCLES = 1U << 15,
KPERF_SAMPLE_BRANCH_TYPE_SAVE = 1U << 16,
};
struct EvtAttr {
int groupId;
unsigned period;
unsigned excludeUser : 1;
unsigned excludeKernel : 1;
};
struct PmuAttr {
char** evtList;
unsigned numEvt;
int* pidList;
unsigned numPid;
int* cpuList;
unsigned numCpu;
struct EvtAttr *evtAttr;
unsigned numEvtAttr;
union {
unsigned period;
unsigned freq;
};
unsigned useFreq : 1;
unsigned excludeUser : 1;
unsigned excludeKernel : 1;
enum SymbolMode symbolMode;
unsigned callStack : 1;
unsigned blockedSample : 1;
enum SpeFilter dataFilter;
enum SpeEventFilter evFilter;
unsigned long minLatency;
unsigned includeNewFork : 1;
unsigned long branchSampleFilter;
char** cgroupNameList;
unsigned numCgroup;
unsigned enableUserAccess : 1;
unsigned enableBpf : 1;
unsigned enableHwMetric : 1;
unsigned enableOnExec : 1;
unsigned perThread : 1;
};
enum PmuTraceType {
TRACE_SYS_CALL,
};
struct PmuTraceAttr {
const char **funcs;
unsigned numFuncs;
int* pidList;
unsigned numPid;
int* cpuList;
unsigned numCpu;
};
struct CpuTopology {
int coreId;
int numaId;
int socketId;
};
enum SPE_EVENTS {
SPE_EV_EXCEPT = 1 << 0,
SPE_EV_RETIRED = 1 << 1,
SPE_EV_L1D_ACCESS = 1 << 2,
SPE_EV_L1D_REFILL = 1 << 3,
SPE_EV_TLB_ACCESS = 1 << 4,
SPE_EV_TLB_WALK = 1 << 5,
SPE_EV_NOT_TAKEN = 1 << 6,
SPE_EV_MISPRED = 1 << 7,
SPE_EV_LLC_ACCESS = 1 << 8,
SPE_EV_LLC_MISS = 1 << 9,
SPE_EV_REMOTE_ACCESS = 1 << 10,
SPE_EV_ALIGNMENT = 1 << 11,
SPE_EV_PARTIAL_PRED = 1 << 17,
SPE_EV_EMPTY_PRED = 1 << 18,
SPE_FORWARD_HAZARD = 1 << 24,
SPE_STRUCTURE_HAZARD = 1 << 25,
};
enum HIP_DATA_SOURCE {
HIP_PEER_CPU = 0,
HIP_PEER_CPU_HITM = 1,
HIP_L3 = 2,
HIP_L3_HITM = 3,
HIP_PEER_CLUSTER = 4,
HIP_PEER_CLUSTER_HITM = 5,
HIP_REMOTE_SOCKET = 6,
HIP_REMOTE_SOCKET_HITM = 7,
HIP_LOCAL_MEM = 8,
HIP_REMOTE_MEM = 9,
HIP_NC_DEV = 13,
HIP_L2 = 16,
HIP_L2_HITM = 17,
HIP_L1 = 18,
};
enum SpeLdStOpType {
SPE_OP_LD = 1 << 20,
SPE_OP_ST = 1 << 21,
SPE_OP_ATOMIC = 1 << 22,
SPE_OP_EXCL = 1 << 23,
SPE_OP_AR = 1 << 24,
};
struct BranchSampleRecord {
unsigned long fromAddr;
unsigned long toAddr;
unsigned long cycles;
uint8_t misPred;
uint8_t predicted;
};
struct PmuDataExt {
union {
struct {
unsigned long pa;
unsigned long va;
unsigned long event;
unsigned short lat;
unsigned short source;
uint32_t op;
};
struct {
unsigned long nr;
struct BranchSampleRecord *branchRecords;
};
};
};
struct SampleRawData {
char *data;
};
struct SampleRawField {
char* fieldName;
char* fieldStr;
unsigned offset;
unsigned size;
unsigned isSigned;
};
struct PmuData {
struct Stack* stack;
const char *evt;
int64_t ts;
pid_t pid;
int tid;
int cpu;
int groupId;
struct CpuTopology *cpuTopo;
const char *comm;
uint64_t period;
uint64_t count;
double countPercent;
struct PmuDataExt *ext;
struct SampleRawData *rawData;
const char* cgroupName;
};
struct PmuTraceData {
const char *funcs;
int64_t startTs;
double elapsedTime;
pid_t pid;
int tid;
int cpu;
const char *comm;
};
struct PmuCpuFreqDetail {
int cpuId;
uint64_t minFreq;
uint64_t maxFreq;
uint64_t avgFreq;
};
* @brief
* Initialize the collection target.
* On success, a task id is returned which is the unique identity for the task.
* On error, -1 is returned.
* Refer to comments of PmuAttr for details about settings.
* @param collectType task type
* @param attr settings of the current task
* @return task id
*/
int PmuOpen(enum PmuTaskType collectType, struct PmuAttr *attr);
* @brief
* Query all available event from system.
* @param eventType type of event chosen by user
* @param numEvt length of event list
* @return event list
*/
const char** PmuEventList(enum PmuEventType eventType, unsigned *numEvt);
* @brief
* Enable counting or sampling of task <pd>.
* On success, 0 is returned.
* On error, -1 is returned.
* @param pd task id
* @return error code
*/
int PmuEnable(int pd);
* @brief
* Disable counting or sampling of task <pd>.
* On success, 0 is returned.
* On error, -1 is returned.
* @param pd task id
* @return error code
*/
int PmuDisable(int pd);
* @brief
* Collect <milliseconds> milliseconds. If <milliseconds> is equal to - 1 and the PID list is not empty, the collection
* is performed until all processes are complete.
* @param milliseconds
* @param interval internal collect period. Unit: millisecond. Must be larger than or equal to 100.
* @return int
*/
int PmuCollect(int pd, int milliseconds, unsigned interval);
* @brief
* Similar to <PmuCollect>, and <PmuCollectV> accepts multiple pds.
* @param milliseconds
* @return int
*/
int PmuCollectV(int *pd, unsigned len, int milliseconds);
* @brief stop a sampling task in asynchronous mode
* @param pd pmu descriptor.
*/
void PmuStop(int pd);
* @brief exit the analysis phase, currently only symbol resolution is effective.
* @param pd
*/
void PmuExit(int pd);
* @brief
* Collect data.
* Pmu data are collected starting from the last PmuEnable or PmuRead.
* That is to say, for COUNTING, counts of all pmu event are reset to zero in PmuRead.
* For SAMPLING and SPE_SAMPLING, samples collected are started from the last PmuEnable or PmuRead.
* On success, length of data array is returned.
* If <pmuData> is NULL and the error code is 0, no data is available in the current collection time.
* If <pmuData> is NULL and the error code is not 0, an error occurs in the collection process and data cannot be read.
* @param pd task id
* @param pmuData pmu data which is a pointer to an array
* @return length of pmu data
*/
int PmuRead(int pd, struct PmuData** pmuData);
* @brief
* When symbol mode is RESOLVE_DELAY_ELF or RESOLVE_DELAY_DWARF, you can use this resolve PmuData Symbol after PmuRead function
* @param pmuData the data from PmuRead
* @return 0 indicates resolve success, otherwise return error code
*/
int ResolvePmuDataSymbol(struct PmuData* pmuData);
* @brief
* Append data list <fromData> to another data list <*toData>.
* The pointer of data list <*toData> will be refreshed after this function is called.
* On success, length of <*toData> is returned.
* On error, -1 is returned.
* @param fromData data list which will be copied to <*toData>
* @param toData pointer to target data list. If data list <*toData> is NULL, a new list will be created.
* @return length of <toData>
*/
int PmuAppendData(struct PmuData *fromData, struct PmuData **toData);
* @brief
* Dump pmu data to a specific file.
* If file exists, then data will be appended to file.
* If file does not exist, then file will be created.
* Dump format: comm pid tid cpu period evt count addr symbolName offset module fileName lineNum
* @param pmuData data list.
* @param len data length.
* @param filepath path of the output file.
* @param dumpDwf if 0, source file and line number of symbols will not be dumped, otherwise, they will be dumped to file.
*/
int PmuDumpData(struct PmuData *pmuData, unsigned len, char *filepath, int dumpDwf);
* @brief
* Close task with id <pd>.
* After PmuClose is called, all pmu data related to the task become invalid.
* @param pd task id
*/
void PmuClose(int pd);
* @brief Free PmuData pointer.
* @param pmuData
*/
void PmuDataFree(struct PmuData* pmuData);
* @brief Get the pointer trace event raw field.
* @param rawData the raw data.
* @param fieldName the filed name of one field.
* @param value the pointer of value.
* @param vSize the memory size of value.
* @return 0 success other failed.
*/
int PmuGetField(struct SampleRawData *rawData, const char *fieldName, void *value, uint32_t vSize);
* @brief Get the SampleRawField explation.
* @param rawData
* @param fieldName
* @return
*/
struct SampleRawField *PmuGetFieldExp(struct SampleRawData *rawData, const char *fieldName);
enum PmuDeviceMetric {
PMU_DDR_READ_BW,
PMU_DDR_WRITE_BW,
PMU_L3_TRAFFIC,
PMU_L3_MISS,
PMU_L3_REF,
PMU_L3_LAT,
PMU_PCIE_RX_MRD_BW,
PMU_PCIE_RX_MWR_BW,
PMU_PCIE_TX_MRD_BW,
PMU_PCIE_TX_MWR_BW,
PMU_PCIE_RX_MRD_LAT,
PMU_PCIE_RX_MWR_LAT,
PMU_PCIE_TX_MRD_LAT,
PMU_SMMU_TRAN,
PMU_HHA_CROSS_NUMA,
PMU_HHA_CROSS_SOCKET
};
struct PmuDeviceAttr {
enum PmuDeviceMetric metric;
char *bdf;
char *port;
};
enum PmuBdfType {
PMU_BDF_TYPE_PCIE,
PMU_BDF_TYPE_SMMU
};
enum PmuMetricMode {
PMU_METRIC_INVALID,
PMU_METRIC_CORE,
PMU_METRIC_NUMA,
PMU_METRIC_CLUSTER,
PMU_METRIC_BDF,
PMU_METRIC_CHANNEL
};
* @brief
* Query all available bdf list from system.
* @param bdfType type of bdf chosen by user
* @param numBdf length of bdf list
* @return bdf list
*/
const char** PmuDeviceBdfList(enum PmuBdfType bdfType, unsigned *numBdf);
* @brief
* A high level interface for initializing pmu events for devices,
* such as L3 cache, DDRC, PCIe, and SMMU, to collect metrics like bandwidth, latency, and others.
* This interface is an alternative option for initializing events besides PmuOpen.
* @param attr Array of metrics to collect
* @param len Length of array
* @return Task Id, similar with returned value of PmuOpen
*/
int PmuDeviceOpen(struct PmuDeviceAttr *attr, unsigned len);
struct PmuDeviceData {
enum PmuDeviceMetric metric;
double count;
enum PmuMetricMode mode;
union {
unsigned coreId;
unsigned numaId;
unsigned clusterId;
char *bdf;
char *port;
struct {
unsigned channelId;
unsigned ddrNumaId;
unsigned socketId;
};
};
};
* @brief
* Query device metrics from pmuData and metric array.
* @param pmuData pmuData read from PmuRead
* @param len length of pmuData
* @param attr metric array to query
* @param attrLen length of metric array
* @param data output metric data array, the length of array is the returned value
* @return On success, length of metric data array is returned.
* On fail, -1 is returned and use Perror to get error message.
*/
int PmuGetDevMetric(struct PmuData *pmuData, unsigned len,
struct PmuDeviceAttr *attr, unsigned attrLen,
struct PmuDeviceData **data);
* @brief Free PmuDeviceData pointer.
* @param data
*/
void DevDataFree(struct PmuDeviceData *data);
* @brief Get core list of a cluster.
* @param clusterId cluster id
* @param coreList core id list, malloced by this method.
* @return length of core id list
*/
int PmuGetClusterCore(unsigned clusterId, unsigned **coreList);
* @brief Get core list of a numa node.
* @param clusterId numa id
* @param coreList core id list, malloced by this method.
* @return length of core id list
*/
int PmuGetNumaCore(unsigned nodeId, unsigned **coreList);
* @brief
* Initialize the trace collection target.
* On success, a trace collect task id is returned which is the unique identity for the task.
* On error, -1 is returned.
* Refer to comments of PmuTraceAttr for details about settings.
* @param PmuTraceType task type
* @param PmuTraceAttr settings of the current trace collect task
* @return trace collect task id
*/
int PmuTraceOpen(enum PmuTraceType traceType, struct PmuTraceAttr *traceAttr);
* @brief
* Enable trace collection of task <pd>.
* On success, 0 is returned.
* On error, -1 is returned.
* @param pd trace collect task id
* @return error code
*/
int PmuTraceEnable(int pd);
* @brief
* Disable trace collection of task <pd>.
* On success, 0 is returned.
* On error, -1 is returned.
* @param pd trace collect task id
* @return error code
*/
int PmuTraceDisable(int pd);
* @brief
* Collect data.
* Pmu trace data are collected starting from the last PmuTraceEnable or PmuTraceRead.
* On success, length of data array is returned.
* If <PmuTraceData> is NULL and the error code is 0, no data is available in the current collection time.
* If <PmuTraceData> is NULL and the error code is not 0, an error occurs in the collection process and data cannot be read.
* @param pd trace collect task id
* @param PmuTraceData pmu trace data which is a pointer to an array
* @return length of pmu trace data
*/
int PmuTraceRead(int pd, struct PmuTraceData** pmuData);
* @brief
* Close task with id <pd>.
* After PmuTraceClose is called, all pmu trace data related to the task become invalid.
* @param pd trace collect task id
*/
void PmuTraceClose(int pd);
* @brief Free PmuTraceData pointer.
* @param pmuTraceData
*/
void PmuTraceDataFree(struct PmuTraceData* pmuTraceData);
* @brief
* Query all available system call function from system.
* @param numFunc length of system call function list
* @return system call function list
*/
const char** PmuSysCallFuncList(unsigned *numFunc);
* @brief
* Get cpu frequency of cpu core.
* @param core Index of core
* @return On success, core frequency(Hz) is returned.
* On error, -1 is returned and call Perrorno to get error.
*/
int64_t PmuGetCpuFreq(unsigned core);
* @brief get the maximum frequency,minimum frequency,and average frequency of each core
* @param cpuNum
* @return PmuCpuFreqDetail array of pointers
*/
struct PmuCpuFreqDetail* PmuReadCpuFreqDetail(unsigned* cpuNum);
* @brief open cpu core freq sampling
* @param time period unit ms
* @return -1 or 0
*/
int PmuOpenCpuFreqSampling(unsigned period);
* @brief close cpu freq sampling
*/
void PmuCloseCpuFreqSampling();
typedef void* PmuFile;
* @brief Begin to write PmuData list to perf.data file.
* It is a simplified perf.data only include basic fields for perf sample,
* including id, tid, pid, addr and branch stack.
* It also includes sample like mmap, mmap2, comm, fork and one feature section(build id).
* @param path path of perf.data
* @param pattr PmuAttr of collection task
* @param addIdHdr add sample id for Non PERF_RECORD_SAMPLE samples
* @return a handle of file to write. If error, return NULL and check Perrorno.
*/
PmuFile PmuBeginWrite(const char *path, const struct PmuAttr *pattr, const int addIdHdr);
* @brief Write PmuData list to file.
* @param file file handle
* @param data PmuData list
* @param len length of data
* @return On success, return SUCCESS. on error, return error code.
*/
int PmuWriteData(PmuFile file, struct PmuData *data, int len);
* @brief End to write file.
* @param file file handle
*/
void PmuEndWrite(PmuFile file);
enum PmuHwMetric {
PMU_HWM_CPI = 1 << 0,
PMU_HWM_CACHE_MISS = 1 << 1,
PMU_HWM_L3_CACHE_MISS = 1 << 2,
PMU_HWM_L2D_CACHE_MISS = 1 << 3,
PMU_HWM_L1_DCACHE_MISS = 1 << 4,
PMU_HWM_L1_ICACHE_LOAD_MISS = 1 << 5,
PMU_HWM_DTLB_LOAD_MISS = 1 << 6,
PMU_HWM_ITLD_LOAD_MISS = 1 << 7,
PMU_HWM_BRACH_LOADS_MISS = 1 << 8,
};
struct PmuHwMetricAttr {
unsigned long metric;
unsigned* basePeriodList;
double* thresholdList;
unsigned pid;
};
int PmuOpenWithHWMetric(struct PmuHwMetricAttr* hwMetricAttr);
struct SymbolSource {
char *moduleName;
char *symbolName;
};
struct UTraceAttr {
struct SymbolSource *symSrc;
unsigned numSym;
int *pidList;
unsigned numPid;
unsigned fetchG;
};
struct UTraceData {
unsigned long addr;
const char *comm;
int tid;
int cpu;
int64_t timestamp;
uint64_t gPtr;
const char* module;
const char* func;
unsigned isRet;
};
* @brief Open a new trace session
*
* Based on the provided UTraceAttr configuration, this function resolves ELF symbols,
* installs probes, and registers PMU events. It returns a session descriptor.
*
* @param attr Pointer to a UTraceAttr structure containing symbol sources,
* PID list, CPU list, and other configuration parameters.
* @return int Returns the session descriptor (pd) on success, or -1 on failure.
*/
int UTraceOpen(struct UTraceAttr *attr);
* @brief Enable a trace session
*
* Calls the underlying PmuEnable interface to start sampling or event collection.
*
* @param pd Trace session descriptor
* @return int Returns 0 on success, or an error code on failure.
*/
int UTraceEnable(int pd);
* @brief Disable a trace session
*
* Calls the underlying PmuDisable interface to stop sampling or event collection.
*
* @param pd Trace session descriptor
* @return int Returns 0 on success, or an error code on failure.
*/
int UTraceDisable(int pd);
* @brief Read trace data
*
* Reads sampled data from the specified trace session and converts it
* into a UTraceData structure.
*
* @param pd Trace session descriptor
* @param traceData Output parameter, returns a pointer to UTraceData
* (must be freed with TraceDataFree)
* @return int Returns the number of records read; -1 on failure.
*/
int UTraceRead(int pd, struct UTraceData **traceData);
* @brief Free trace data memory
*
* Releases the memory allocated for a UTraceData structure returned by TraceRead.
*
* @param traceData Pointer to the UTraceData structure to be freed.
*/
void UTraceDataFree(struct UTraceData *traceData);
* @brief Close a trace session
*
* Closes the specified trace session, uninstalls probes,
* and clears events and cached data.
*
* @param pd Trace session descriptor
*/
void UTraceClose(int pd);
#pragma GCC visibility pop
#ifdef __cplusplus
}
#endif
#endif