* Copyright (c) 2016-2023 Google, Inc. All rights reserved.
* **********************************************************/
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of Google, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
* on a file. We separate this from analyzer_multi, which can operate online
* or on a raw trace file, to avoid needing to link in DR itself.
*/
#ifndef _ANALYZER_H_
#define _ANALYZER_H_ 1
* @file drmemtrace/analyzer.h
* @brief DrMemtrace top-level trace analysis driver.
*/
#include <stdint.h>
#include <iterator>
#include <memory>
#include <queue>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "analysis_tool.h"
#include "memref.h"
#include "reader.h"
#include "record_file_reader.h"
#include "scheduler.h"
#include "trace_entry.h"
namespace dynamorio {
namespace drmemtrace {
* An analyzer is the top-level driver of a set of trace analysis tools.
* It supports two different modes of operation: either it iterates over the
* trace and calls the process_memref() routine of each tool, or it exposes
* an iteration interface to external control code.
*
* RecordType is the type of entry to be analyzed: #dynamorio::drmemtrace::memref_t or
* #dynamorio::drmemtrace::trace_entry_t. ReaderType is the reader that allows reading
* entries of type T: #dynamorio::drmemtrace::reader_t or
* #dynamorio::drmemtrace::record_reader_t respectively.
*
* #dynamorio::drmemtrace::analyzer_tmpl_t<#dynamorio::drmemtrace::memref_t,
* #dynamorio::drmemtrace::reader_t> is the primary type of analyzer, which is used for
* most purposes. It uses tools of type
* #dynamorio::drmemtrace::analysis_tool_tmpl_t<#dynamorio::drmemtrace::memref_t>. This
* analyzer provides various features to support trace analysis, e.g. processing the
* instruction encoding entries and making it available to the tool inside
* #dynamorio::drmemtrace::memref_t.
*
* #dynamorio::drmemtrace::analyzer_tmpl_t<#dynamorio::drmemtrace::trace_entry_t,
* #dynamorio::drmemtrace::record_reader_t> is used in special cases where an offline
* trace needs to be observed exactly as stored on disk, without hiding any internal
* entries. It uses tools of type
* #dynamorio::drmemtrace::analysis_tool_tmpl_t<#dynamorio::drmemtrace::trace_entry_t>.
*
* TODO i#5727: When we convert #dynamorio::drmemtrace::reader_t into a template on
* RecordType, we can remove the second template parameter to
* #dynamorio::drmemtrace::analyzer_tmpl_t, and simply use reader_tmpl_t<RecordType>
* instead.
*/
template <typename RecordType, typename ReaderType> class analyzer_tmpl_t {
public:
* Usage: errors encountered during a constructor will set a flag that should
* be queried via operator!(). If operator!() returns true, get_error_string()
* can be used to try to obtain more information.
*/
analyzer_tmpl_t();
virtual ~analyzer_tmpl_t();
virtual bool
operator!();
virtual std::string
get_error_string();
* The analyzer usage model supports multiple tools, with the trace iteration
* performed by analyzer_t. It supports parallel trace analysis. The analyzer will
* reference the tools array passed in during its lifetime: it does not make a copy.
* The user must free them afterward. The analyzer calls the initialize_stream()
* function on each tool before use.
*/
analyzer_tmpl_t(const std::string &trace_path,
analysis_tool_tmpl_t<RecordType> **tools, int num_tools,
int worker_count = 0, uint64_t skip_instrs = 0,
uint64_t interval_microseconds = 0, int verbosity = 0);
virtual bool
run();
virtual bool
print_stats();
protected:
typedef scheduler_tmpl_t<RecordType, ReaderType> sched_type_t;
struct analyzer_tool_shard_data_t {
analyzer_tool_shard_data_t()
: shard_data(nullptr)
{
}
analyzer_tool_shard_data_t(analyzer_tool_shard_data_t &&src)
{
shard_data = src.shard_data;
interval_snapshot_data = std::move(src.interval_snapshot_data);
}
void *shard_data;
std::queue<typename analysis_tool_tmpl_t<RecordType>::interval_state_snapshot_t *>
interval_snapshot_data;
private:
analyzer_tool_shard_data_t(const analyzer_tool_shard_data_t &) = delete;
analyzer_tool_shard_data_t &
operator=(const analyzer_tool_shard_data_t &) = delete;
};
struct analyzer_shard_data_t {
analyzer_shard_data_t()
: cur_interval_index(0)
, cur_interval_init_instr_count(0)
, shard_id(0)
{
}
uint64_t cur_interval_index;
uint64_t cur_interval_init_instr_count;
int64_t shard_id;
std::vector<analyzer_tool_shard_data_t> tool_data;
private:
analyzer_shard_data_t(const analyzer_shard_data_t &) = delete;
analyzer_shard_data_t &
operator=(const analyzer_shard_data_t &) = delete;
};
struct analyzer_worker_data_t {
analyzer_worker_data_t(int index, typename sched_type_t::stream_t *stream)
: index(index)
, stream(stream)
{
}
analyzer_worker_data_t(analyzer_worker_data_t &&src)
{
index = src.index;
stream = src.stream;
shard_data = std::move(src.shard_data);
error = std::move(src.error);
}
int index;
typename scheduler_tmpl_t<RecordType, ReaderType>::stream_t *stream;
std::string error;
std::unordered_map<int, analyzer_shard_data_t> shard_data;
private:
analyzer_worker_data_t(const analyzer_worker_data_t &) = delete;
analyzer_worker_data_t &
operator=(const analyzer_worker_data_t &) = delete;
};
bool
init_scheduler(const std::string &trace_path,
memref_tid_t only_thread = INVALID_THREAD_ID, int verbosity = 0,
typename sched_type_t::scheduler_options_t *options = nullptr);
bool
init_scheduler(
std::unique_ptr<ReaderType> reader = std::unique_ptr<ReaderType>(nullptr),
std::unique_ptr<ReaderType> reader_end = std::unique_ptr<ReaderType>(nullptr),
int verbosity = 0, typename sched_type_t::scheduler_options_t *options = nullptr);
bool
init_scheduler_common(typename sched_type_t::input_workload_t &workload,
typename sched_type_t::scheduler_options_t *options);
void
process_tasks(analyzer_worker_data_t *worker);
void
process_serial(analyzer_worker_data_t &worker);
bool
record_has_tid(RecordType record, memref_tid_t &tid);
bool
record_is_thread_final(RecordType record);
bool
record_is_timestamp(const RecordType &record);
bool
process_interval(uint64_t interval_id, uint64_t interval_init_instr_count,
analyzer_worker_data_t *worker, bool parallel, int shard_idx = 0);
uint64_t
compute_interval_id(uint64_t first_timestamp, uint64_t latest_timestamp);
uint64_t
compute_interval_end_timestamp(uint64_t first_timestamp, uint64_t interval_id);
bool
advance_interval_id(
typename scheduler_tmpl_t<RecordType, ReaderType>::stream_t *stream,
analyzer_shard_data_t *shard, uint64_t &prev_interval_index,
uint64_t &prev_interval_init_instr_count);
virtual bool
collect_and_maybe_merge_shard_interval_results();
bool
merge_shard_interval_results(
std::vector<std::queue<
typename analysis_tool_tmpl_t<RecordType>::interval_state_snapshot_t *>>
&intervals,
std::vector<typename analysis_tool_tmpl_t<RecordType>::interval_state_snapshot_t
*> &merged_intervals,
int tool_idx);
bool
combine_interval_snapshots(
const std::vector<
const typename analysis_tool_tmpl_t<RecordType>::interval_state_snapshot_t *>
&latest_shard_snapshots,
uint64_t interval_end_timestamp, int tool_idx,
typename analysis_tool_tmpl_t<RecordType>::interval_state_snapshot_t *&result);
uint64_t
get_current_microseconds();
bool success_;
scheduler_tmpl_t<RecordType, ReaderType> scheduler_;
std::string error_string_;
std::vector<analyzer_worker_data_t> worker_data_;
int num_tools_;
analysis_tool_tmpl_t<RecordType> **tools_;
std::vector<std::vector<
typename analysis_tool_tmpl_t<RecordType>::interval_state_snapshot_t *>>
merged_interval_snapshots_;
bool parallel_;
int worker_count_;
const char *output_prefix_ = "[analyzer]";
uint64_t skip_instrs_ = 0;
uint64_t interval_microseconds_ = 0;
int verbosity_ = 0;
shard_type_t shard_type_ = SHARD_BY_THREAD;
bool sched_by_time_ = false;
private:
bool
serial_mode_supported();
};
typedef analyzer_tmpl_t<memref_t, reader_t> analyzer_t;
typedef analyzer_tmpl_t<trace_entry_t, dynamorio::drmemtrace::record_reader_t>
record_analyzer_t;
}
}
#endif