* Copyright (c) 2022-2023 Google, Inc. All rights reserved.
* **********************************************************/
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of Google, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
* stream of trace_entry_t exactly as present in a stored offline trace.
*/
#ifndef _RECORD_FILE_READER_H_
#define _RECORD_FILE_READER_H_ 1
#include <assert.h>
#include <iterator>
#include <memory>
#include "memtrace_stream.h"
#include "reader.h"
#include "trace_entry.h"
#define OUT
#ifdef DEBUG
# define VPRINT(reader, level, ...) \
do { \
if ((reader)->verbosity_ >= (level)) { \
fprintf(stderr, "%s ", (reader)->output_prefix_); \
fprintf(stderr, __VA_ARGS__); \
} \
} while (0)
# define UNUSED(x)
#else
# define VPRINT(reader, level, ...)
# define UNUSED(x) ((void)(x))
#endif
namespace dynamorio {
namespace drmemtrace {
* Trace reader that provides the stream of #dynamorio::drmemtrace::trace_entry_t exactly
* as present in an offline trace stored on disk. The public API is similar to
* #dynamorio::drmemtrace::reader_t, except that it is an iterator over
* #dynamorio::drmemtrace::trace_entry_t entries instead of
* #dynamorio::drmemtrace::memref_t. This does not yet support iteration over a serialized
* stream of multiple traces.
*
* TODO i#5727: Convert #dynamorio::drmemtrace::reader_t and file_reader_t into templates:
* `reader_tmpl_t<RecordType>` and file_reader_tmpl_t<T, RecordType> where T is one of
* gzip_reader_t, zipfile_reader_t, snappy_reader_t, std::ifstream*, and RecordType is one
* of #dynamorio::drmemtrace::memref_t, #dynamorio::drmemtrace::trace_entry_t. Then,
* typedef the #dynamorio::drmemtrace::trace_entry_t specializations as record_reader_t
* and record_file_reader_t<T> respectively. This will allow significant code reuse,
* particularly for serializing multiple thread traces into a single stream.
*
* Since the current file_reader_t is already a template on T, adding the second template
* parameter RecordType is complex. Note that we cannot have partial specialization of
* member functions in C++. This complicates implementation of various
* file_reader_tmpl_t<T, RecordType> specializations for T, as we would need to duplicate
* the implementation for each candidate of RecordType.
*
* We have two options:
* 1. For each member function specialized for some T, duplicate the definition for
* file_reader_tmpl_t<T, #dynamorio::drmemtrace::memref_t> and file_reader_tmpl_t<T,
* trace_entry_t>. This has the obvious disadvantage of code duplication, which can be
* mitigated to some extent by extracting common logic in static routines.
* 2. For each specialization of T, create a subclass templatized on RecordType that
* inherits from file_reader_tmpl_t<_, RecordType>. E.g. for T = gzip_reader_t, create
* `class gzip_file_reader_t<RecordType>: public file_reader_tmpl_t<gzip_reader_t,
* RecordType>` This has the disadvantage of breaking backward-compatibility of the
* existing reader interface. Users that define their own readers outside DR will need
* to adapt to this change. The advantage of this approach is that it is somewhat
* cleaner to have proper classes instead of template specializations for file readers.
*
* We prefer Option 2, since it has higher merit.
*
* Currently we do not have any use-case that needs this design, but when we need to
* support serial iteration over #dynamorio::drmemtrace::trace_entry_t, we would want to
* do this to reuse the existing multiple trace serialization code in file_reader_t.
* file_reader_t hides some #dynamorio::drmemtrace::trace_entry_t entries today (like
* TRACE_TYPE_THREAD, TRACE_TYPE_PID, etc); we would also need to avoid doing that since
* record_reader_t is expected to provide the exact stream of
* #dynamorio::drmemtrace::trace_entry_t as stored on disk.
*/
class record_reader_t : public std::iterator<std::input_iterator_tag, trace_entry_t>,
public memtrace_stream_t {
public:
record_reader_t(int verbosity, const char *prefix)
: verbosity_(verbosity)
, output_prefix_(prefix)
, eof_(false)
{
}
record_reader_t()
{
}
virtual ~record_reader_t()
{
}
bool
init()
{
if (!open_input_file())
return false;
++*this;
return true;
}
const trace_entry_t &
operator*()
{
return cur_entry_;
}
record_reader_t &
operator++()
{
bool res = read_next_entry();
assert(res || eof_);
UNUSED(res);
if (!eof_) {
++cur_ref_count_;
if (type_is_instr(static_cast<trace_type_t>(cur_entry_.type)))
++cur_instr_count_;
else if (cur_entry_.type == TRACE_TYPE_MARKER) {
switch (cur_entry_.size) {
case TRACE_MARKER_TYPE_VERSION: version_ = cur_entry_.addr; break;
case TRACE_MARKER_TYPE_FILETYPE: filetype_ = cur_entry_.addr; break;
case TRACE_MARKER_TYPE_CACHE_LINE_SIZE:
cache_line_size_ = cur_entry_.addr;
break;
case TRACE_MARKER_TYPE_PAGE_SIZE: page_size_ = cur_entry_.addr; break;
case TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT:
chunk_instr_count_ = cur_entry_.addr;
break;
case TRACE_MARKER_TYPE_TIMESTAMP:
last_timestamp_ = cur_entry_.addr;
if (first_timestamp_ == 0)
first_timestamp_ = last_timestamp_;
break;
}
}
}
return *this;
}
uint64_t
get_record_ordinal() const override
{
return cur_ref_count_;
}
uint64_t
get_instruction_ordinal() const override
{
return cur_instr_count_;
}
uint64_t
get_last_timestamp() const override
{
return last_timestamp_;
}
uint64_t
get_first_timestamp() const override
{
return first_timestamp_;
}
uint64_t
get_version() const override
{
return version_;
}
uint64_t
get_filetype() const override
{
return filetype_;
}
uint64_t
get_cache_line_size() const override
{
return cache_line_size_;
}
uint64_t
get_chunk_instr_count() const override
{
return chunk_instr_count_;
}
uint64_t
get_page_size() const override
{
return page_size_;
}
virtual bool
operator==(const record_reader_t &rhs) const
{
return BOOLS_MATCH(eof_, rhs.eof_);
}
virtual bool
operator!=(const record_reader_t &rhs) const
{
return !BOOLS_MATCH(eof_, rhs.eof_);
}
virtual record_reader_t &
skip_instructions(uint64_t instruction_count)
{
uint64_t stop_count = cur_instr_count_ + instruction_count + 1;
while (cur_instr_count_ < stop_count) {
++(*this);
}
return *this;
}
protected:
virtual bool
read_next_entry() = 0;
virtual bool
open_single_file(const std::string &input_path) = 0;
virtual bool
open_input_file() = 0;
trace_entry_t cur_entry_ = {};
int verbosity_;
const char *output_prefix_;
bool eof_ = true;
private:
uint64_t cur_ref_count_ = 0;
uint64_t cur_instr_count_ = 0;
uint64_t last_timestamp_ = 0;
uint64_t first_timestamp_ = 0;
uint64_t version_ = 0;
uint64_t filetype_ = 0;
uint64_t cache_line_size_ = 0;
uint64_t chunk_instr_count_ = 0;
uint64_t page_size_ = 0;
};
* Similar to file_reader_t, templatizes on the file type for specializing
* for compression and different file types.
*/
template <typename T> class record_file_reader_t : public record_reader_t {
public:
record_file_reader_t(const std::string &path, int verbosity = 0,
const char *prefix = "[record_file_reader_t]")
: record_reader_t(verbosity, prefix)
, input_path_(path)
{
}
record_file_reader_t()
{
}
std::string
get_stream_name() const override
{
size_t ind = input_path_.find_last_of(DIRSEP);
assert(ind != std::string::npos);
return input_path_.substr(ind + 1);
}
virtual ~record_file_reader_t();
private:
bool
open_input_file() override
{
if (!input_path_.empty()) {
if (!open_single_file(input_path_)) {
ERRMSG("Failed to open %s\n", input_path_.c_str());
return false;
}
}
return true;
}
std::unique_ptr<T> input_file_ = nullptr;
std::string input_path_;
bool
open_single_file(const std::string &path) override;
bool
read_next_entry() override;
};
}
}
#endif