* Copyright (c) 2021-2022 Huawei Device Co., Ltd.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SUBCOMMAND_RECORD_H
#define SUBCOMMAND_RECORD_H
#define HIDEBUG_RECORD_NOT_PROCESS 0
#define HIDEBUG_RECORD_NOT_PROCESS_VM 0
#define HIDEBUG_RECORD_NOT_SAVE 0
#define HIDEBUG_SKIP_PROCESS_SYMBOLS 0
#define HIDEBUG_SKIP_MATCH_SYMBOLS 0
#define HIDEBUG_SKIP_LOAD_KERNEL_SYMBOLS 0
#define HIDEBUG_SKIP_SAVE_SYMBOLS 0
#define USE_COLLECT_SYMBOLIC 1
#include <functional>
#include <thread>
#include <unordered_map>
#include <unordered_set>
#include <chrono>
#include "perf_event_record.h"
#include "perf_events.h"
#include "perf_file_writer.h"
#include "perf_pipe.h"
#include "subcommand.h"
#include "virtual_runtime.h"
namespace OHOS {
namespace Developtools {
namespace HiPerf {
class SubCommandRecord : public SubCommand {
public:
static constexpr int DEFAULT_CPU_PERCENT = 25;
static constexpr int MIN_CPU_PERCENT = 1;
static constexpr int MAX_CPU_PERCENT = 100;
static constexpr int MIN_SAMPLE_FREQUENCY = 1;
static constexpr int MAX_SAMPLE_FREQUENCY = 100000;
static constexpr int DEFAULT_MMAP_PAGES = 256;
static constexpr int MIN_PERF_MMAP_PAGE = 2;
static constexpr int MAX_PERF_MMAP_PAGE = 1024;
static constexpr int DEFAULT_CHECK_APP_MS = 10;
static constexpr int MIN_CHECK_APP_MS = 1;
static constexpr int MAX_CHECK_APP_MS = 200;
static constexpr float MIN_STOP_SECONDS = 0.100;
static constexpr float MAX_STOP_SECONDS = 10000.0;
static constexpr int MIN_SAVED_CMDLINES_SIZE = 512;
static constexpr int DEFAULT_SAVED_CMDLINES_SIZE = 2048;
static constexpr int MAX_SAVED_CMDLINES_SIZE = 4096;
static constexpr uint64_t MIN_BACKTRACK_TIME_SEC = 5;
static constexpr uint64_t DEFAULT_BACKTRACK_TIME_SEC = 10;
static constexpr uint64_t MAX_BACKTRACK_TIME_SEC = 30;
SubCommandRecord()
: SubCommand("record", "Collect performance sample information",
"Usage: hiperf record [options] [command [command-args]]\n"
" Collect performance sampling information of running [command].\n"
" The default options are: -c <all cpu> --cpu-limit 25 -d 10000.0 -e hw-cpu-cycles\n"
" -f 4000 -m 1024 -o /data/local/tmp/perf.data.\n"
" -a\n"
" Collect system-wide information.\n"
" for measures all processes/threads\n"
" This requires CAP_PERFMON (since Linux 5.8) or CAP_SYS_ADMIN capability or a\n"
" /proc/sys/kernel/perf_event_paranoid value of less than 1.\n"
" --exclude-hiperf\n"
" Don't record events issued by hiperf itself.\n"
" -c <cpuid>[<,cpuid>]...\n"
" cpuid should be 0,1,2...\n"
" Limit the CPU that collects data.\n"
" 0 means cpu0, 1 means cpu1 ...\n"
" --cpu-limit <percent>\n"
" Set the max percent of cpu time used for recording.\n"
" percent is in range [1-100], default is 25.\n"
" -d <sec>\n"
" stop in <sec> seconds. floating point number. seconds is in range [0.100-10000.0]\n"
" default is 10000.0\n"
" -f <freq>\n"
" Set event sampling frequency. default is 4000 samples every second.\n"
" check /proc/sys/kernel/perf_event_max_sample_rate for maximum allowed frequency\n"
" --period <num>\n"
" Set event sampling period for tracepoint events. recording one sample when <num> events happen.\n"
" The default <num> is 1\n"
" --raw-data\n"
" Enable raw binary data collection for events, usually used to get detailed information\n"
" from events such as tracepoints.\n"
" -e <event1[:<u|k>]>[,event1[:<u|k>]]...\n"
" Customize the name of the event that needs to be sampled.\n"
" The name can use the names listed in the list parameter.\n"
" It can also be represented by the value of 0x<hex>.\n"
" u - monitor user space events only\n"
" k - monitor kernel space events only\n"
" -g <event1[:<u|k>]>[,event1[:<u|k>]]...\n"
" Put the events into a group, can set multiple groups by multiple -g\n"
" PMU is required to report data in designated groups\n"
" limited by HW capability, too many events cannot be reported in the same sampling)\n"
" --add-counter <event1[:<u|k>]>[,event1[:<u|k>]]...\n"
" Add extra counter events and store their values in each sample.\n"
" Must be used with --no-inherit.\n"
" --no-inherit\n"
" Don't trace child processes.\n"
" -p <pid1>[,pid2]...\n"
" Limit the process id of the collection target. Conflicts with the -a option.\n"
" -t <tid1>[,tid2]...\n"
" Limit the thread id of the collection target. Conflicts with the -a option.\n"
" --exclude-tid <tid1>[,tid2]...\n"
" Exclude threads of the collection target by thread ids. Conflicts with the -a option.\n"
" --exclude-thread <tname1>[,tname2]...\n"
" Exclude threads of the collection target by thread names. Conflicts with the -a option.\n"
" --exclude-process <pname1>[,pname2]...\n"
" Exclude processes by process names. Must be used with -a.\n"
" --offcpu\n"
" Trace when threads are scheduled off cpu.\n"
" -j <branch_filter1>[,branch_filter2]...\n"
" taken branch stack sampling, filter can be:\n"
" any: any type of branch\n"
" any_call: any function call or system call\n"
" any_ret: any function return or system call return\n"
" ind_call: any indirect branch\n"
" ind_jmp: any indirect jump\n"
" cond: conditional branches\n"
" call: direct calls, including far (to/from kernel) calls\n"
" u: only when the branch target is at the user level\n"
" k: only when the branch target is in the kernel\n"
" requires at least one of any, any_call, any_ret, ind_call, ind_jmp, cond, call\n"
" -s / --call-stack <fp|dwarf[,size]>\n"
" Setup and enable call stack (stack chain/backtrace) recording, Default is 'fp'.\n"
" the value can be:\n"
" fp: frame pointer\n"
" dwarf: DWARF's CFI - Call Frame Information\n"
" 'dwarf,size' set sample stack size, size should be in 8~65528 and 8 byte aligned. \n"
" as the method to collect the information used to show the call stacks.\n"
" --kernel-callchain\n"
" collect kernel callchain, must used with -s fp/dwarf simultaneously.\n"
" --callchain-useronly\n"
" collect only user callchain.\n"
" --delay-unwind\n"
" If '-s dwarf' used, stack will be unwind while recording, use this option to switch\n"
" to unwind after recording.\n"
" --disable-unwind\n"
" If '-s dwarf' is used, stack will be unwind while recording by default\n"
" use this option to disable unwinding.\n"
" --disable-callstack-expand\n"
" If '-s dwarf' is used, to break the 64k stack limit, callstack is merged by default\n"
" to build more complete call stack. that may not be correct sometimes.\n"
" --enable-debuginfo-symbolic\n"
" If '-s fp/dwarf' is used, symbols in .gnu_debugdata section of an elf, also called minidebuginfo\n"
" will be parsed, if not use this option, we will not parse minidebuginfo by default.\n"
" --clockid <clock_id>\n"
" Set the clock id to use for the various time fields in the perf_event_type records.\n"
" monotonic and monotonic_raw are supported,\n"
" some events might also allow boottime, realtime and clock_tai.\n"
" --pipe_input\n"
" Enable anonymous pipe for the client (calling process) to send control\n"
" commands to the server (hiperf executable program).\n"
" This parameter is designed for system-side C++ interface calls.\n"
" Application developers do not need to pay attention to this parameter\n"
" when using the hiperf command tool.\n"
" --pipe_output\n"
" Enable anonymous pipe for the server (hiperf executable program) to send\n"
" response to the client (calling process).\n"
" This parameter is designed for system-side C++ interface calls.\n"
" Application developers do not need to pay attention to this parameter\n"
" when using the hiperf command tool.\n"
" --symbol-dir <dir>\n"
" Set directory to look for symbol files, used for unwinding. \n"
" -m <mmap_pages>\n"
" Number of the mmap pages, used to receiving record data from kernel,\n"
" must be a power of two, rang[2,1024], default is 1024.\n"
" --app <package_name>\n"
" Collect profile info for an OHOS app, the app must be debuggable.\n"
" Record will exit if the process is not started within 20 seconds.\n"
" --chkms <millisec>\n"
" Set the interval of querying the <package_name>.\n"
" <millisec> is in range [1-200], default is 10.\n"
" --data-limit <SIZE[K|M|G]>\n"
" Stop recording after SIZE bytes of records. Default is unlimited.\n"
" --append-smo-data\n"
" Output information about the original shared libraries included in SOs\n"
" that support the SMO(Shared library Merge Optimization) feature. \n"
" The information mainly consists of a mapping table between the PC and\n"
" the original shared library names.\n"
" -o <output_file_name>\n"
#if defined(is_sandbox_mapping) && is_sandbox_mapping
" Set output file name, default is " + GetDefaultPathByEnv("perf.data") + ".\n"
#else
" Set output file name, default is /data/local/tmp/perf.data.\n"
#endif
" -z\n"
" Compress record data.\n"
" --restart\n"
" Collect performance counter information of application startup.\n"
" Record will exit if the process is not started within 30 seconds.\n"
" --verbose\n"
" Show more detailed reports.\n"
" --control <command>\n"
" Control sampling by <command>, the <command> can be:\n"
" prepare: set arguments and prepare sampling\n"
" start: start sampling\n"
" pause: pause sampling\n"
" resume: resume sampling\n"
" output: output sampling data\n"
" stop: stop sampling\n"
" --dedup_stack\n"
" Remove duplicated stacks in perf record, conflicts with -a, only restrain using with -p\n"
" --cmdline-size <size>\n"
" set value to /sys/kernel/tracing/saved_cmdlines_size\n"
" the value should be between 512 and 4096\n"
" --report\n"
" Report with callstack after record. Conflicts with the -a option.\n"
" --backtrack\n"
" Collect data of the previous period. only restrain using with --control.\n"
" --backtrack-sec\n"
" If '--backtrack' is used, stop in <sec> seconds. seconds is in range [5-30]\n"
" default is 10\n"
" --dumpoptions\n"
" Dump command options.\n"
)
{
}
~SubCommandRecord();
HiperfError OnSubCommand(std::vector<std::string>& args) override;
bool ParseOption(std::vector<std::string> &args) override;
void DumpOptions(void) const override;
void AddReportArgs(CommandReporter& reporter) override;
static bool RegisterSubCommandRecord(void);
std::map<const std::string, uint64_t> speOptMap_ = {
{"branch_filter", 0}, {"load_filter", 0},
{"store_filter", 0}, {"ts_enable", 0},
{"pa_enable", 0}, {"jitter", 0},
{"min_latency", 0}, {"event_filter", 0},
{"pct_enable", 0},
};
static SubCommand& GetInstance();
private:
PerfEvents perfEvents_;
PerfPipe perfPipe_;
bool targetSystemWide_ = false;
bool compressData_ = false;
bool noInherit_ = false;
bool excludeHiperf_ = false;
bool appendSmoData_ = false;
bool offCPU_ = false;
bool delayUnwind_ = false;
bool disableUnwind_ = false;
bool disableCallstackExpend_ = false;
bool enableDebugInfoSymbolic_ = false;
bool verboseReport_ = false;
bool kernelCallChain_ = true;
bool callChainUserOnly_ = false;
bool report_ = false;
bool sampleRaw_ = false;
float timeStopSec_ = PerfEvents::DEFAULT_TIMEOUT;
int frequency_ = 0;
int period_ = 0;
int cpuPercent_ = DEFAULT_CPU_PERCENT;
int mmapPages_ = MAX_PERF_MMAP_PAGE;
int cmdlinesSize_ = DEFAULT_SAVED_CMDLINES_SIZE;
int oldCmdlinesSize_ = 0;
std::vector<std::string> symbolDir_ = {};
#if defined(is_sandbox_mapping) && is_sandbox_mapping
std::string outputFilename_ = GetDefaultPathByEnv("perf.data");
#else
std::string outputFilename_ = "/data/local/tmp/perf.data";
#endif
std::string appPackage_ = {};
int checkAppMs_ = DEFAULT_CHECK_APP_MS;
std::string clockId_ = {};
std::string strLimit_ = {};
std::string fifoFileC2S_;
std::string fifoFileS2C_;
std::vector<pid_t> selectCpus_ = {};
std::vector<pid_t> selectPids_ = {};
std::vector<pid_t> selectTids_ = {};
std::vector<pid_t> originalPids_ = {};
std::vector<pid_t> inputPidTidArgs_ = {};
bool restart_ = false;
std::vector<std::string> selectEvents_ = {};
std::vector<std::string> addCounters_ = {};
std::vector<std::vector<std::string>> selectGroups_ = {};
std::vector<std::string> callStackType_ = {};
std::vector<std::string> vecBranchFilters_ = {};
std::vector<std::string> trackedCommand_ = {};
std::vector<pid_t> excludeTidArgs_ = {};
std::vector<std::string> excludeThreadNameArgs_ = {};
std::vector<std::string> excludeProcessNameArgs_ = {};
std::set<pid_t> excludePids_ = {};
std::set<pid_t> excludeTids_ = {};
std::unordered_set<uint32_t> rootPids_;
void CollectExcludeThread();
void SetExcludeHiperf();
bool IsThreadExcluded(const pid_t pid, const pid_t tid);
bool backtrack_ = false;
uint64_t backtrackTime_ = DEFAULT_BACKTRACK_TIME_SEC;
bool outputEnd_ = false;
bool PreOutputRecordFile();
void OutputRecordFile();
bool PostOutputRecordFile(const bool output);
#ifdef CONFIG_HAS_CCM
static constexpr char PRODUCT_CONFIG_PATH[] = "etc/hiperf/hiperf_cfg.json";
static constexpr char CFG_MAP_PAGES[] = "MmapPages";
void GetMmapPagesCfg();
#endif
bool GetOptions(std::vector<std::string> &args);
bool CheckArgsRange();
bool CheckExcludeArgs();
bool CheckOptions();
bool GetSpeOptions();
bool CheckDataLimitOption();
bool CheckSelectCpuPidOption();
bool GetOptionFrequencyAndPeriod(std::vector<std::string> &args);
bool isCallStackDwarf_ = false;
bool isCallStackFp_ = false;
uint32_t callStackDwarfSize_ = MAX_SAMPLE_STACK_SIZE;
uint64_t branchSampleType_ = 0;
uint64_t dataSizeLimit_ = 0;
bool isDataSizeLimitStop_ = false;
std::unique_ptr<PerfFileWriter> fileWriter_ = nullptr;
int clientPipeInput_ = -1;
int clientPipeOutput_ = -1;
int readFd_ = -1;
int writeFd_ = -1;
int nullFd_ = -1;
std::thread clientCommandHandle_;
std::thread replyCommandHandle_;
std::atomic_bool clientRunning_ = true;
bool isHiperfClient_ = false;
struct ControlCommandHandler {
std::function<bool()> preProcess = []() -> bool {
return false;
};
std::function<void(bool)> postProcess = [](bool) {};
};
std::unordered_map<std::string, ControlCommandHandler> controlCommandHandlerMap_ = {};
inline void CreateClientThread();
inline void CreateReplyThread();
void ClientCommandHandle();
void ReplyCommandHandle();
void InitControlCommandHandlerMap();
void DispatchControlCommand(const std::string& command);
bool ClientCommandResponse(const bool response);
bool ClientCommandResponse(const std::string& str);
bool ChildResponseToMain(const bool response);
bool ChildResponseToMain(const std::string& str);
bool IsSamplingRunning();
bool allowIpc_ = true;
std::string controlCmd_ = {};
bool isFifoServer_ = false;
bool isFifoClient_ = false;
bool dedupStack_ = false;
std::map<pid_t, std::vector<pid_t>> mapPids_;
bool ProcessControl();
bool CreateFifoServer();
bool MainRecvFromChild(const int fd, std::string& reply);
void HandleChildProcess(int pipeFd[2]);
bool HandleParentProcess(int pipeFd[2], pid_t pid);
bool HandleReply(bool recvSuccess, const std::string& reply, bool& isSuccess, bool& shouldPrintReply);
bool HandleFinalResult(bool isSuccess, pid_t pid, bool shouldPrintReply);
void CloseClientThread();
void CloseReplyThread();
bool PreparePerfEvent();
bool PrepareSysKernel();
void PrepareKernelMaps();
bool PrepareVirtualRuntime();
size_t recordSamples_ = 0;
size_t recordNoSamples_ = 0;
bool isNeedSetPerfHarden_ = false;
bool isSpe_ = false;
const bool isRoot_ = IsRoot();
uint32_t offset_ = 0;
bool ProcessRecord(PerfEventRecord& record);
bool SaveRecord(const PerfEventRecord& record);
uint32_t GetOffsetNum();
void UpdateDevHostMaps(PerfEventRecord& record);
void UpdateDevHostCallChains(PerfEventRecord& record);
void UpdateDevHostMapsAndIPs(PerfEventRecord& record);
uint32_t GetCountFromFile(const std::string &fileName);
std::string GetCpuDescFromFile();
bool AddCpuFeature();
void AddMemTotalFeature();
void AddEventDescFeature();
void AddRecordTimeFeature();
void AddWorkloadCmdFeature();
void AddCommandLineFeature();
void AddCpuOffFeature();
void AddAddCounterFeature();
void AddDevhostFeature();
bool AddFeatureRecordFile();
bool CreateInitRecordFile(const bool compressData = false);
bool FinishWriteRecordFile();
bool PostProcessRecordFile();
bool RecordCompleted();
#ifdef HIPERF_DEBUG_TIME
void ReportTime();
#endif
bool CollectionSymbol(PerfEventRecord& record);
void CollectSymbol(PerfRecordSample *sample);
bool SetPerfLimit(const std::string& file, const int value, std::function<bool (int, int)> const& cmd,
const std::string& param);
bool SetPerfCpuMaxPercent();
bool SetPerfMaxSampleRate();
bool SetPerfEventMlock();
bool SetPerfHarden();
bool TraceOffCpu();
bool ParseCallStackOption(const std::vector<std::string> &callStackType);
bool ParseDataLimitOption(const std::string &str);
bool ParseBranchSampleType(const std::vector<std::string> &vecBranchSampleTypes);
bool ParseControlCmd(const std::string cmd);
bool CheckTargetProcessOptions();
bool CheckTargetPids();
bool CheckReportOption();
bool CheckBacktrackOption();
bool CheckSpeOption();
bool IsAppRestarted();
bool CheckAppRestart();
pid_t GetPidFromAppPackage(const pid_t oldPid, const uint64_t waitAppTimeOut);
bool IsAppRunning();
bool IsPidAndTidExist();
void MsgPrintAndTrans(const bool isTrans, const std::string& msg);
void WriteCommEventBeforeSampling();
void RemoveVdsoTmpFile();
void RemoveFifoFile();
void UpdateMapPids();
VirtualRuntime virtualRuntime_;
#if USE_COLLECT_SYMBOLIC
std::unordered_map<pid_t, std::unordered_set<uint64_t>> kernelThreadSymbolsHits_;
kSymbolsHits kernelSymbolsHits_;
uSymbolsHits userSymbolsHits_;
void SymbolicHits();
#endif
#ifdef HIPERF_DEBUG_TIME
std::chrono::microseconds prcessRecordTimes_ = std::chrono::microseconds::zero();
std::chrono::microseconds saveRecordTimes_ = std::chrono::microseconds::zero();
std::chrono::microseconds saveFeatureTimes_ = std::chrono::microseconds::zero();
#endif
std::chrono::time_point<std::chrono::steady_clock> startSaveFileTimes_;
void SetHM();
void CollectRootPids();
void HandleRootProcess(const pid_t& pid);
void SetSavedCmdlinesSize();
void RecoverSavedCmdlinesSize();
bool OnlineReportData();
HiperfError CheckTargetAndApp();
HiperfError PrepareSystemAndRecorder();
HiperfError PrepareRuntimeAndThreads();
HiperfError StartSamplingAndFile();
bool AddEventsAndHandleOffCpu();
bool ConfigureStackAndBranch();
bool HandleArmSpeEvent();
bool ProcessSymbolsIfNeeded();
bool ProcessUserSymbols();
void ConfigureBasicParams();
void ConfigureSamplingAndBacktrack();
void CleanupForBacktrack();
void UpdateKernelRelatedSymbols();
using CheckRecordCallBack = std::function<void(const PerfEventRecord&)>;
void SetCheckRecordCallback(CheckRecordCallBack callback);
CheckRecordCallBack checkCallback_ = nullptr;
};
}
}
}
#endif