* Copyright (c) 2011-2023 Google, Inc. All rights reserved.
* Copyright (c) 2010 Massachusetts Institute of Technology All rights reserved.
* ******************************************************************************/
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of Google, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
* DrMemtrace trace data output logic.
*/
#include "output.h"
#include <sys/types.h>
#include <atomic>
#include <cstdint>
#include <cstring>
#include <string>
#include "dr_api.h"
#include "drmemtrace.h"
#include "drmgr.h"
#include "droption.h"
#include "drx.h"
#include "instru.h"
#include "instr_counter.h"
#include "named_pipe.h"
#include "options.h"
#include "physaddr.h"
#include "raw2trace.h"
#include "trace_entry.h"
#include "tracer.h"
#include "utils.h"
#ifdef HAS_SNAPPY
# include <snappy.h>
# include "snappy_file_writer.h"
#endif
#ifdef HAS_ZLIB
# include <zlib.h>
#endif
#ifdef HAS_LZ4
# include <lz4frame.h>
#endif
namespace dynamorio {
namespace drmemtrace {
* Trace thresholds.
*/
* synchronization costs and only add to the global every N counts.
*/
static std::atomic<uint64> cur_window_instr_count;
static ptr_int_t
get_local_window(per_thread_t *data)
{
return *(ptr_int_t *)TLS_SLOT(data->seg_base, MEMTRACE_TLS_OFFS_WINDOW);
}
static ptr_int_t
get_local_mode(per_thread_t *data)
{
return *(ptr_int_t *)TLS_SLOT(data->seg_base, MEMTRACE_TLS_OFFS_MODE);
}
static void
set_local_mode(per_thread_t *data, ptr_int_t mode)
{
*(ptr_int_t *)TLS_SLOT(data->seg_base, MEMTRACE_TLS_OFFS_MODE) = mode;
}
static uint64
local_instr_count_threshold(uint64 trace_for_instrs)
{
if (trace_for_instrs > INSTR_COUNT_LOCAL_UNIT * 10)
return INSTR_COUNT_LOCAL_UNIT;
else {
return trace_for_instrs / 10;
}
}
static bool
count_traced_instrs(void *drcontext, uintptr_t toadd, uint64 trace_for_instrs)
{
per_thread_t *data = (per_thread_t *)drmgr_get_tls_field(drcontext, tls_idx);
data->cur_window_instr_count += toadd;
if (data->cur_window_instr_count >= local_instr_count_threshold(trace_for_instrs)) {
uint64 newval = cur_window_instr_count.fetch_add(data->cur_window_instr_count,
std::memory_order_release) +
data->cur_window_instr_count;
data->cur_window_instr_count = 0;
if (newval >= trace_for_instrs)
return true;
}
return false;
}
static void
reached_traced_instrs_threshold(void *drcontext)
{
per_thread_t *data = (per_thread_t *)drmgr_get_tls_field(drcontext, tls_idx);
dr_mutex_lock(mutex);
if (get_local_window(data) != tracing_window.load(std::memory_order_acquire)) {
dr_mutex_unlock(mutex);
return;
}
NOTIFY(0, "Hit tracing window #%zd limit: disabling tracing.\n",
tracing_window.load(std::memory_order_acquire));
tracing_window.fetch_add(1, std::memory_order_release);
DR_ASSERT(tracing_mode.load(std::memory_order_acquire) == BBDUP_MODE_TRACE);
tracing_mode.store(BBDUP_MODE_COUNT, std::memory_order_release);
cur_window_instr_count.store(0, std::memory_order_release);
dr_mutex_unlock(mutex);
}
* Buffer writing to disk.
*/
static int notify_beyond_global_max_once;
static volatile bool exited_process;
static offline_file_type_t
get_file_type()
{
offline_file_type_t file_type = OFFLINE_FILE_TYPE_DEFAULT;
if (op_L0I_filter.get_value()) {
file_type =
static_cast<offline_file_type_t>(file_type | OFFLINE_FILE_TYPE_IFILTERED);
}
if (op_L0D_filter.get_value()) {
file_type =
static_cast<offline_file_type_t>(file_type | OFFLINE_FILE_TYPE_DFILTERED);
}
if (op_L0_filter_until_instrs.get_value()) {
file_type = static_cast<offline_file_type_t>(
file_type | OFFLINE_FILE_TYPE_BIMODAL_FILTERED_WARMUP);
}
if (op_disable_optimizations.get_value()) {
file_type = static_cast<offline_file_type_t>(file_type |
OFFLINE_FILE_TYPE_NO_OPTIMIZATIONS);
}
if (op_instr_only_trace.get_value() ||
(op_L0D_filter.get_value() && op_L0D_size.get_value() == 0)) {
file_type = static_cast<offline_file_type_t>(file_type |
OFFLINE_FILE_TYPE_INSTRUCTION_ONLY);
}
if (op_instr_encodings.get_value()) {
file_type =
static_cast<offline_file_type_t>(file_type | OFFLINE_FILE_TYPE_ENCODINGS);
}
#ifdef BUILD_PT_TRACER
if (op_enable_kernel_tracing.get_value()) {
file_type = static_cast<offline_file_type_t>(file_type |
OFFLINE_FILE_TYPE_KERNEL_SYSCALLS);
}
#endif
file_type = static_cast<offline_file_type_t>(
file_type |
IF_X86_ELSE(
IF_X64_ELSE(OFFLINE_FILE_TYPE_ARCH_X86_64, OFFLINE_FILE_TYPE_ARCH_X86_32),
IF_X64_ELSE(OFFLINE_FILE_TYPE_ARCH_AARCH64, OFFLINE_FILE_TYPE_ARCH_ARM32)));
if (!op_L0I_filter.get_value()) {
file_type = static_cast<offline_file_type_t>(file_type |
OFFLINE_FILE_TYPE_SYSCALL_NUMBERS);
}
#ifdef LINUX
file_type =
static_cast<offline_file_type_t>(file_type | OFFLINE_FILE_TYPE_BLOCKING_SYSCALLS);
#endif
return file_type;
}
#ifdef HAS_LZ4
static const LZ4F_preferences_t lz4_ops = {
{ LZ4F_max256KB, LZ4F_blockLinked, LZ4F_noContentChecksum, LZ4F_frame,
0, 0, LZ4F_noBlockChecksum },
0,
0,
0,
{ 0, 0, 0 },
};
#endif
#ifdef HAS_SNAPPY
static inline bool
snappy_enabled()
{
return op_raw_compress.get_value() == "snappy" ||
op_raw_compress.get_value() == "snappy_nocrc";
}
#endif
#ifdef HAS_ZLIB
static void *
redirect_malloc(void *drcontext, uint items, uint per_size)
{
void *mem;
size_t size = items * per_size;
size += sizeof(size_t);
mem = dr_custom_alloc(nullptr, static_cast<dr_alloc_flags_t>(0), size,
DR_MEMPROT_READ | DR_MEMPROT_WRITE, nullptr);
if (mem == NULL)
return Z_NULL;
*((size_t *)mem) = size;
return (byte *)mem + sizeof(size_t);
}
static void
redirect_free(void *drcontext, void *ptr)
{
if (ptr != NULL) {
byte *mem = (byte *)ptr;
mem -= sizeof(size_t);
dr_custom_free(nullptr, static_cast<dr_alloc_flags_t>(0), mem, *((size_t *)mem));
}
}
#endif
int
append_unit_header(void *drcontext, byte *buf_ptr, thread_id_t tid, ptr_int_t window)
{
int size_added = instru->append_unit_header(buf_ptr, tid, window);
if (op_L0I_filter.get_value()) {
uintptr_t icount = 0;
if (drcontext != NULL) {
per_thread_t *data = (per_thread_t *)drmgr_get_tls_field(drcontext, tls_idx);
icount = *(uintptr_t *)TLS_SLOT(data->seg_base, MEMTRACE_TLS_OFFS_ICOUNT);
}
size_added += instru->append_marker(buf_ptr + size_added,
TRACE_MARKER_TYPE_INSTRUCTION_COUNT, icount);
}
return size_added;
}
void
open_new_window_dir(ptr_int_t window_num)
{
if (!op_split_windows.get_value())
return;
DR_ASSERT(op_offline.get_value());
char windir[MAXIMUM_PATH];
dr_snprintf(windir, BUFFER_SIZE_ELEMENTS(windir), "%s%s" WINDOW_SUBDIR_FORMAT,
logsubdir, DIRSEP, window_num);
NULL_TERMINATE_BUFFER(windir);
if (!file_ops_func.create_dir(windir))
FATAL("Failed to create window subdir %s\n", windir);
NOTIFY(2, "Created new window dir %s\n", windir);
}
static void
close_thread_file(void *drcontext)
{
per_thread_t *data = (per_thread_t *)drmgr_get_tls_field(drcontext, tls_idx);
#ifdef HAS_SNAPPY
if (op_offline.get_value() && snappy_enabled()) {
data->snappy_writer->~snappy_file_writer_t();
dr_custom_free(nullptr, static_cast<dr_alloc_flags_t>(0), data->snappy_writer,
sizeof(*data->snappy_writer));
data->snappy_writer = nullptr;
}
#endif
#ifdef HAS_ZLIB
if (op_offline.get_value() &&
(op_raw_compress.get_value() == "zlib" ||
op_raw_compress.get_value() == "gzip")) {
data->zstream.next_in = (Bytef *)BUF_PTR(data->seg_base);
data->zstream.avail_in = 0;
int res, iters = 0;
const int MAX_ITERS = 32;
do {
data->zstream.next_out = (Bytef *)data->buf_compressed;
data->zstream.avail_out = static_cast<uInt>(max_buf_size);
res = deflate(&data->zstream, Z_FINISH);
NOTIFY(3, "final deflate => %d in=%d out=%d => in=%d, out=%d, wrote=%d\n",
res, 0, max_buf_size, data->zstream.avail_in, data->zstream.avail_out,
max_buf_size - data->zstream.avail_out);
file_ops_func.write_file(data->file, data->buf_compressed,
max_buf_size - data->zstream.avail_out);
} while ((res == Z_OK || res == Z_BUF_ERROR) && ++iters < MAX_ITERS);
DR_ASSERT(res == Z_STREAM_END);
deflateEnd(&data->zstream);
}
#endif
#ifdef HAS_LZ4
if (op_offline.get_value() && op_raw_compress.get_value() == "lz4") {
size_t res =
LZ4F_compressEnd(data->lzcxt, data->buf_lz4, data->buf_lz4_size, nullptr);
DR_ASSERT(!LZ4F_isError(res));
file_ops_func.write_file(data->file, data->buf_lz4, res);
res = LZ4F_freeCompressionContext(data->lzcxt);
DR_ASSERT(!LZ4F_isError(res));
}
#endif
file_ops_func.close_file(data->file);
data->file = INVALID_FILE;
}
static bool
open_new_thread_file(void *drcontext, ptr_int_t window_num)
{
per_thread_t *data = (per_thread_t *)drmgr_get_tls_field(drcontext, tls_idx);
bool opened_new_file = false;
DR_ASSERT(op_offline.get_value());
const char *dir = logsubdir;
char windir[MAXIMUM_PATH];
if (has_tracing_windows()) {
if (op_split_windows.get_value()) {
dr_snprintf(windir, BUFFER_SIZE_ELEMENTS(windir), "%s%s" WINDOW_SUBDIR_FORMAT,
logsubdir, DIRSEP, window_num);
NULL_TERMINATE_BUFFER(windir);
dir = windir;
} else if (data->file != INVALID_FILE)
return false;
}
* Since we're now in a subdir we could make the name simpler but this
* seems nice and complete.
*/
char buf[MAXIMUM_PATH];
int i;
const int NUM_OF_TRIES = 10000;
uint flags =
IF_UNIX(DR_FILE_CLOSE_ON_FORK |) DR_FILE_ALLOW_LARGE | DR_FILE_WRITE_REQUIRE_NEW;
* file name for creation. Retry if the same name file already exists.
* Abort if we fail too many times.
*/
const char *suffix = OUTFILE_SUFFIX;
#ifdef HAS_SNAPPY
if (snappy_enabled())
suffix = OUTFILE_SUFFIX_SZ;
#endif
#ifdef HAS_ZLIB
if (op_raw_compress.get_value() == "zlib")
suffix = OUTFILE_SUFFIX_ZLIB;
else if (op_raw_compress.get_value() == "gzip")
suffix = OUTFILE_SUFFIX_GZ;
#endif
#ifdef HAS_LZ4
if (op_raw_compress.get_value() == "lz4")
suffix = OUTFILE_SUFFIX_LZ4;
#endif
for (i = 0; i < NUM_OF_TRIES; i++) {
drx_open_unique_appid_file(dir, dr_get_thread_id(drcontext), subdir_prefix,
suffix, DRX_FILE_SKIP_OPEN, buf,
BUFFER_SIZE_ELEMENTS(buf));
NULL_TERMINATE_BUFFER(buf);
file_t new_file = file_ops_func.call_open_file(
buf, flags, dr_get_thread_id(drcontext), window_num);
if (new_file == INVALID_FILE)
continue;
if (new_file == data->file)
FATAL("Failed to create new thread file for window %s\n", buf);
NOTIFY(2, "Created thread trace file %s\n", buf);
opened_new_file = true;
if (data->file != INVALID_FILE)
close_thread_file(drcontext);
data->file = new_file;
#ifdef HAS_SNAPPY
if (snappy_enabled()) {
void *placement = dr_custom_alloc(
nullptr, static_cast<dr_alloc_flags_t>(0), sizeof(*data->snappy_writer),
DR_MEMPROT_READ | DR_MEMPROT_WRITE, nullptr);
data->snappy_writer = new (placement)
snappy_file_writer_t(data->file, file_ops_func.write_file,
op_raw_compress.get_value() != "snappy_nocrc");
data->snappy_writer->write_file_header();
}
#endif
#ifdef HAS_ZLIB
if (op_offline.get_value() && op_raw_compress.get_value() == "zlib") {
memset(&data->zstream, 0, sizeof(data->zstream));
data->zstream.zalloc = redirect_malloc;
data->zstream.zfree = redirect_free;
data->zstream.opaque = drcontext;
int res = deflateInit(&data->zstream, Z_BEST_SPEED);
DR_ASSERT(res == Z_OK);
} else if (op_offline.get_value() && op_raw_compress.get_value() == "gzip") {
memset(&data->zstream, 0, sizeof(data->zstream));
data->zstream.zalloc = redirect_malloc;
data->zstream.zfree = redirect_free;
data->zstream.opaque = drcontext;
const int ZLIB_WINDOW_SIZE = 15;
const int ZLIB_REQUEST_GZIP = 16;
const int ZLIB_MAX_MEM = 9;
int res = deflateInit2(&data->zstream, Z_BEST_SPEED, Z_DEFLATED,
ZLIB_WINDOW_SIZE + ZLIB_REQUEST_GZIP, ZLIB_MAX_MEM,
Z_DEFAULT_STRATEGY);
DR_ASSERT(res == Z_OK);
}
#endif
#ifdef HAS_LZ4
if (op_offline.get_value() && op_raw_compress.get_value() == "lz4") {
size_t res = LZ4F_createCompressionContext(&data->lzcxt, LZ4F_VERSION);
DR_ASSERT(!LZ4F_isError(res));
res = LZ4F_compressBegin(data->lzcxt, data->buf_lz4, data->buf_lz4_size,
&lz4_ops);
DR_ASSERT(!LZ4F_isError(res));
ssize_t wrote = file_ops_func.write_file(data->file, data->buf_lz4, res);
DR_ASSERT(static_cast<size_t>(wrote) == res);
}
#endif
break;
}
if (i == NUM_OF_TRIES) {
FATAL("Fatal error: failed to create trace file %s\n", buf);
}
return opened_new_file;
}
* Returns the size of the added thread header.
*/
static size_t
prepend_offline_thread_header(void *drcontext)
{
DR_ASSERT(op_offline.get_value());
per_thread_t *data = (per_thread_t *)drmgr_get_tls_field(drcontext, tls_idx);
size_t size = reinterpret_cast<offline_instru_t *>(instru)->append_thread_header(
data->buf_base, dr_get_thread_id(drcontext), get_file_type());
BUF_PTR(data->seg_base) = data->buf_base + size;
data->has_thread_header = true;
return size;
}
static inline byte *
atomic_pipe_write(void *drcontext, byte *pipe_start, byte *pipe_end, ptr_int_t window)
{
ssize_t towrite = pipe_end - pipe_start;
DR_ASSERT(towrite <= ipc_pipe.get_atomic_write_size() && towrite > 0);
if (ipc_pipe.write((void *)pipe_start, towrite) < (ssize_t)towrite) {
FATAL("Fatal error: failed to write to pipe\n");
}
if (pipe_end - instru->sizeof_entry() > pipe_start) {
pipe_start = pipe_end - instru->sizeof_entry();
size_t added = instru->append_tid(pipe_start, dr_get_thread_id(drcontext));
DR_ASSERT(added == instru->sizeof_entry());
}
return pipe_start;
}
static inline byte *
write_trace_data(void *drcontext, byte *towrite_start, byte *towrite_end,
ptr_int_t window)
{
if (op_offline.get_value()) {
per_thread_t *data = (per_thread_t *)drmgr_get_tls_field(drcontext, tls_idx);
ssize_t size = towrite_end - towrite_start;
DR_ASSERT(data->file != INVALID_FILE);
if (file_ops_func.handoff_buf != NULL) {
if (!file_ops_func.handoff_buf(data->file, towrite_start, size,
max_buf_size)) {
FATAL("Fatal error: failed to hand off trace\n");
}
} else {
ssize_t wrote;
#ifdef HAS_SNAPPY
if (op_offline.get_value() && snappy_enabled())
wrote = data->snappy_writer->compress_and_write(towrite_start, size);
else
#endif
#ifdef HAS_ZLIB
if (op_offline.get_value() &&
(op_raw_compress.get_value() == "zlib" ||
op_raw_compress.get_value() == "gzip")) {
data->zstream.next_in = (Bytef *)towrite_start;
data->zstream.avail_in = static_cast<uInt>(size);
int res;
do {
data->zstream.next_out = (Bytef *)data->buf_compressed;
data->zstream.avail_out = static_cast<uInt>(max_buf_size);
res = deflate(&data->zstream, Z_NO_FLUSH);
NOTIFY(3, "deflate => %d in=%d out=%d => in=%d, out=%d, write=%d\n",
res, size, size, data->zstream.avail_in,
data->zstream.avail_out,
max_buf_size - data->zstream.avail_out);
DR_ASSERT(res != Z_STREAM_ERROR);
wrote =
file_ops_func.write_file(data->file, data->buf_compressed,
max_buf_size - data->zstream.avail_out);
} while (data->zstream.avail_out == 0);
DR_ASSERT(data->zstream.avail_in == 0);
wrote = size;
} else
#endif
#ifdef HAS_LZ4
if (op_offline.get_value() && op_raw_compress.get_value() == "lz4") {
size_t res =
LZ4F_compressUpdate(data->lzcxt, data->buf_lz4, data->buf_lz4_size,
towrite_start, size, nullptr);
DR_ASSERT(!LZ4F_isError(res));
wrote = file_ops_func.write_file(data->file, data->buf_lz4, res);
DR_ASSERT(static_cast<size_t>(wrote) == res);
wrote = size;
} else
#endif
wrote = file_ops_func.write_file(data->file, towrite_start, size);
if (wrote < size) {
FATAL("Fatal error: failed to write trace for T%d window %zd: wrote %zd "
"of %zd\n",
dr_get_thread_id(drcontext), get_local_window(data), wrote, size);
}
}
return towrite_start;
} else {
#ifdef HAS_SNAPPY
#endif
return atomic_pipe_write(drcontext, towrite_start, towrite_end, window);
}
}
static bool
set_local_window(void *drcontext, ptr_int_t value)
{
per_thread_t *data = (per_thread_t *)drmgr_get_tls_field(drcontext, tls_idx);
bool prepended = false;
NOTIFY(3, "%s: T%d %zd (old: %zd)\n", __FUNCTION__, dr_get_thread_id(drcontext),
value, get_local_window(data));
if (op_offline.get_value()) {
ptr_int_t old_val = get_local_window(data);
if (old_val < value || value == 0) {
while (++old_val < value && op_split_windows.get_value()) {
NOTIFY(2, "Writing empty file for T%d window %zd\n",
dr_get_thread_id(drcontext), old_val);
if (!open_new_thread_file(drcontext, old_val)) {
continue;
}
byte buf[sizeof(offline_entry_t) * 32];
byte *entry = buf;
entry +=
reinterpret_cast<offline_instru_t *>(instru)->append_thread_header(
entry, dr_get_thread_id(drcontext), get_file_type());
entry += append_unit_header(drcontext, entry, dr_get_thread_id(drcontext),
old_val);
entry += instru->append_thread_exit(entry, dr_get_thread_id(drcontext));
DR_ASSERT(BUFFER_SIZE_BYTES(buf) >= (size_t)(entry - buf));
write_trace_data(drcontext, (byte *)buf, entry, old_val);
close_thread_file(drcontext);
}
if ((value > 0 && op_split_windows.get_value()) ||
data->init_header_size == 0) {
size_t header_size = prepend_offline_thread_header(drcontext);
if (data->init_header_size == 0)
data->init_header_size = header_size;
else
DR_ASSERT(header_size == data->init_header_size);
prepended = true;
}
if (data->file != INVALID_FILE && value > 0 && op_split_windows.get_value())
close_thread_file(drcontext);
}
}
*(ptr_int_t *)TLS_SLOT(data->seg_base, MEMTRACE_TLS_OFFS_WINDOW) = value;
return prepended;
}
static void
create_buffer(per_thread_t *data)
{
data->buf_base =
(byte *)dr_raw_mem_alloc(max_buf_size, DR_MEMPROT_READ | DR_MEMPROT_WRITE, NULL);
if (data->buf_base == NULL) {
if (data->reserve_buf == NULL) {
FATAL("Fatal error: out of memory and cannot recover.\n");
}
NOTIFY(0, "Out of memory: truncating further tracing.\n");
data->buf_base = data->reserve_buf;
op_max_trace_size.set_value(data->bytes_written - 1);
return;
}
memset(data->buf_base + trace_buf_size, -1, redzone_size);
data->num_buffers++;
if (data->num_buffers == 2) {
* It is much simpler to keep running the same instru that writes to a
* buffer and just never write it out, similarly to how we handle
* -max_trace_size. This costs us some memory (not for idle threads: that's
* why we wait for the 2nd buffer) but we gain simplicity.
*/
data->reserve_buf = (byte *)dr_raw_mem_alloc(
max_buf_size, DR_MEMPROT_READ | DR_MEMPROT_WRITE, NULL);
if (data->reserve_buf != NULL)
memset(data->reserve_buf + trace_buf_size, -1, redzone_size);
}
}
static size_t
get_v2p_buffer_size()
{
return dr_page_size();
}
static void
create_v2p_buffer(per_thread_t *data)
{
data->v2p_buf = (byte *)dr_raw_mem_alloc(get_v2p_buffer_size(),
DR_MEMPROT_READ | DR_MEMPROT_WRITE, NULL);
if (data->v2p_buf == NULL) {
FATAL("Failed to allocate virtual-to-physical buffer.\n");
}
}
static bool
is_ok_to_split_before(trace_type_t type, size_t size)
{
return (op_instr_encodings.get_value()
? type == TRACE_TYPE_ENCODING
: (type_is_instr(type) || type == TRACE_TYPE_INSTR_MAYBE_FETCH)) ||
(type == TRACE_TYPE_MARKER && size != TRACE_MARKER_TYPE_CPU_ID) ||
type == TRACE_TYPE_THREAD_EXIT || op_L0I_filter.get_value();
}
static uint
output_buffer(void *drcontext, per_thread_t *data, byte *buf_base, byte *buf_ptr,
size_t header_size)
{
byte *pipe_start = buf_base;
byte *pipe_end = pipe_start;
if (!op_offline.get_value()) {
byte *post_header = buf_base + header_size;
header_size = instru->sizeof_entry();
for (byte *mem_ref = post_header; mem_ref < buf_ptr;
mem_ref += instru->sizeof_entry()) {
if (is_ok_to_split_before(instru->get_entry_type(mem_ref),
instru->get_entry_size(mem_ref))) {
pipe_end = mem_ref;
if ((mem_ref + (1 + MAX_NUM_DELAY_ENTRIES) * instru->sizeof_entry() -
pipe_start) > ipc_pipe.get_atomic_write_size()) {
DR_ASSERT(is_ok_to_split_before(
instru->get_entry_type(pipe_start + header_size),
instru->get_entry_size(pipe_start + header_size)));
pipe_start = atomic_pipe_write(drcontext, pipe_start, pipe_end,
get_local_window(data));
}
}
}
if ((buf_ptr - pipe_start) > ipc_pipe.get_atomic_write_size()) {
DR_ASSERT(
is_ok_to_split_before(instru->get_entry_type(pipe_start + header_size),
instru->get_entry_size(pipe_start + header_size)));
pipe_start = atomic_pipe_write(drcontext, pipe_start, pipe_end,
get_local_window(data));
}
if ((buf_ptr - pipe_start) > (ssize_t)buf_hdr_slots_size) {
DR_ASSERT(
is_ok_to_split_before(instru->get_entry_type(pipe_start + header_size),
instru->get_entry_size(pipe_start + header_size)));
atomic_pipe_write(drcontext, pipe_start, buf_ptr, get_local_window(data));
}
} else {
write_trace_data(drcontext, pipe_start, buf_ptr, get_local_window(data));
}
auto span = buf_ptr - buf_base;
DR_ASSERT(span % instru->sizeof_entry() == 0);
uint current_num_refs = (uint)(span / instru->sizeof_entry());
data->num_refs += current_num_refs;
uintptr_t mode = tracing_mode.load(std::memory_order_acquire);
if (mode != BBDUP_MODE_L0_FILTER)
data->bytes_written += buf_ptr - pipe_start;
bool is_v2p = false;
if (buf_base >= data->v2p_buf && buf_base < data->v2p_buf + get_v2p_buffer_size())
is_v2p = true;
if (is_v2p)
++data->num_v2p_writeouts;
else
++data->num_writeouts;
if (file_ops_func.handoff_buf != NULL) {
if (is_v2p)
create_v2p_buffer(data);
else
create_buffer(data);
}
return current_num_refs;
}
static byte *
process_entry_for_physaddr(void *drcontext, per_thread_t *data, size_t header_size,
byte *v2p_ptr, byte *mem_ref, addr_t virt, trace_type_t type,
bool *emitted, size_t *skip)
{
bool from_cache = false;
addr_t phys = 0;
bool success = data->physaddr.virtual2physical(drcontext, virt, &phys, &from_cache);
ASSERT(emitted != NULL && skip != NULL, "invalid input parameters");
NOTIFY(4, "%s: type=%s (%2d) virt=%p phys=%p\n", __FUNCTION__, trace_type_names[type],
type, virt, phys);
if (!success) {
NOTIFY(1, "virtual2physical translation failure for type=%s (%2d) addr=%p\n",
trace_type_names[type], type, virt);
phys = virt;
}
if (from_cache)
return v2p_ptr;
if (!*emitted) {
if (header_size > buf_hdr_slots_size) {
size_t size =
reinterpret_cast<offline_instru_t *>(instru)->append_thread_header(
data->v2p_buf, dr_get_thread_id(drcontext), get_file_type());
ASSERT(size == data->init_header_size, "inconsistent header");
*skip = data->init_header_size;
v2p_ptr += size;
}
memcpy(v2p_ptr, data->buf_base + header_size - buf_hdr_slots_size,
buf_hdr_slots_size);
v2p_ptr += buf_hdr_slots_size;
*emitted = true;
}
if (v2p_ptr + 2 * instru->sizeof_entry() - data->v2p_buf >=
static_cast<ssize_t>(get_v2p_buffer_size())) {
NOTIFY(1, "Reached v2p buffer limit: emitting multiple times\n");
data->num_phys_markers +=
output_buffer(drcontext, data, data->v2p_buf, v2p_ptr, header_size);
v2p_ptr = data->v2p_buf;
memcpy(v2p_ptr, data->buf_base + header_size - buf_hdr_slots_size,
buf_hdr_slots_size);
v2p_ptr += buf_hdr_slots_size;
}
if (success) {
v2p_ptr +=
instru->append_marker(v2p_ptr, TRACE_MARKER_TYPE_PHYSICAL_ADDRESS, phys);
v2p_ptr +=
instru->append_marker(v2p_ptr, TRACE_MARKER_TYPE_VIRTUAL_ADDRESS, virt);
} else {
v2p_ptr += instru->append_marker(
v2p_ptr, TRACE_MARKER_TYPE_PHYSICAL_ADDRESS_NOT_AVAILABLE, virt);
}
return v2p_ptr;
}
static size_t
process_buffer_for_physaddr(void *drcontext, per_thread_t *data, size_t header_size,
byte *buf_ptr)
{
ASSERT(op_use_physical.get_value(),
"Caller must check for use_physical being enabled");
byte *v2p_ptr = data->v2p_buf;
size_t skip = 0;
bool emitted = false;
for (byte *mem_ref = data->buf_base + header_size; mem_ref < buf_ptr;
mem_ref += instru->sizeof_entry()) {
trace_type_t type = instru->get_entry_type(mem_ref);
DR_ASSERT(type != TRACE_TYPE_INSTR_BUNDLE);
if (!type_has_address(type))
continue;
addr_t virt = instru->get_entry_addr(drcontext, mem_ref);
v2p_ptr = process_entry_for_physaddr(drcontext, data, header_size, v2p_ptr,
mem_ref, virt, type, &emitted, &skip);
size_t page_size = dr_page_size();
addr_t virt_page = ALIGN_BACKWARD(virt, page_size);
size_t mem_ref_size = instru->get_entry_size(mem_ref);
if (type_is_instr(type) || type == TRACE_TYPE_INSTR_NO_FETCH ||
type == TRACE_TYPE_INSTR_MAYBE_FETCH) {
int instr_count = instru->get_instr_count(mem_ref);
if (op_offline.get_value()) {
static constexpr size_t PREDICT_INSTR_SIZE_BOUND = IF_X86_ELSE(8, 4);
mem_ref_size = instr_count * PREDICT_INSTR_SIZE_BOUND;
} else
ASSERT(instr_count <= 1, "bundles are disabled");
} else if (op_offline.get_value()) {
static constexpr size_t PREDICT_DATA_SIZE_BOUND = sizeof(void *);
mem_ref_size = PREDICT_DATA_SIZE_BOUND;
}
if (ALIGN_BACKWARD(virt + mem_ref_size - 1 , page_size) !=
virt_page) {
NOTIFY(2, "Emitting physaddr for next page %p for type=%s (%2d), addr=%p\n",
virt_page + page_size, trace_type_names[type], type, virt);
v2p_ptr =
process_entry_for_physaddr(drcontext, data, header_size, v2p_ptr, mem_ref,
virt_page + page_size, type, &emitted, &skip);
}
}
if (emitted) {
data->num_phys_markers +=
output_buffer(drcontext, data, data->v2p_buf, v2p_ptr, header_size);
}
return skip;
}
offline_entry_t *
find_unfiltered_record(byte *start, byte *end)
{
offline_entry_t *last = (offline_entry_t *)(end - sizeof(offline_entry_t));
int num_memrefs = 0;
for (offline_entry_t *entry = last; entry >= (offline_entry_t *)start; entry--) {
if (entry->pc.type == OFFLINE_TYPE_PC) {
NOTIFY(4, "PC: instr count = %d, num_memrefs = %d\n", entry->pc.instr_count,
num_memrefs);
if ((entry->pc.instr_count == 1 && num_memrefs > 0) ||
entry->pc.instr_count > 1) {
NOTIFY(4, "Found unfiltered entry=%d\n",
entry - (offline_entry_t *)start);
return entry;
}
return NULL;
} else if (entry->addr.type == OFFLINE_TYPE_MEMREF ||
entry->addr.type == OFFLINE_TYPE_MEMREF_HIGH) {
num_memrefs++;
}
}
return NULL;
}
void
process_and_output_buffer(void *drcontext, bool skip_size_cap)
{
per_thread_t *data = (per_thread_t *)drmgr_get_tls_field(drcontext, tls_idx);
byte *mem_ref, *buf_ptr;
byte *redzone;
bool do_write = true;
uint current_num_refs = 0;
if (op_offline.get_value() && data->file == INVALID_FILE) {
DR_ASSERT(has_tracing_windows() || op_trace_after_instrs.get_value() > 0 ||
attached_midway);
open_new_thread_file(drcontext, get_local_window(data));
}
size_t header_size = buf_hdr_slots_size;
if (data->has_thread_header && op_offline.get_value())
header_size += data->init_header_size;
size_t stamp_offs =
header_size > buf_hdr_slots_size ? header_size - buf_hdr_slots_size : 0;
uint64 min_timestamp;
if (align_attach_detach_endpoints()) {
min_timestamp = attached_timestamp.load(std::memory_order_acquire);
if (min_timestamp == 0) {
NOTIFY(1, "Dropping too-early data for T%zd\n", dr_get_thread_id(drcontext));
BUF_PTR(data->seg_base) = data->buf_base + header_size;
return;
}
instru->clamp_unit_header_timestamp(data->buf_base + stamp_offs, min_timestamp);
}
if (has_tracing_windows()) {
min_timestamp = retrace_start_timestamp.load(std::memory_order_acquire);
instru->clamp_unit_header_timestamp(data->buf_base + stamp_offs, min_timestamp);
}
buf_ptr = BUF_PTR(data->seg_base);
if (buf_ptr == data->buf_base + header_size) {
ptr_int_t window = -1;
if (has_tracing_windows()) {
window = tracing_window.load(std::memory_order_acquire);
if (set_local_window(drcontext, window))
header_size = data->init_header_size;
}
append_unit_header(drcontext, data->buf_base + header_size - buf_hdr_slots_size,
dr_get_thread_id(drcontext), window);
return;
}
data->has_thread_header = false;
bool window_changed = false;
if (has_tracing_windows() &&
get_local_window(data) != tracing_window.load(std::memory_order_acquire)) {
DR_ASSERT(get_local_window(data) <
tracing_window.load(std::memory_order_acquire));
data->cur_window_instr_count = 0;
window_changed = true;
if (op_offline.get_value() && op_split_windows.get_value())
buf_ptr += instru->append_thread_exit(buf_ptr, dr_get_thread_id(drcontext));
}
ptr_int_t mode = tracing_mode.load(std::memory_order_acquire);
if (get_local_mode(data) != mode) {
if (get_local_mode(data) == BBDUP_MODE_L0_FILTER) {
NOTIFY(0, "Thread %d: filter mode changed\n", dr_get_thread_id(drcontext));
byte *end =
(byte *)find_unfiltered_record(data->buf_base + header_size, buf_ptr);
if (end == NULL) {
buf_ptr +=
instru->append_marker(buf_ptr, TRACE_MARKER_TYPE_FILTER_ENDPOINT, 0);
} else {
output_buffer(drcontext, data, data->buf_base, end, 0);
offline_entry_t marker[2];
byte *marker_buf = (byte *)&marker[0];
int size = instru->append_marker(marker_buf,
TRACE_MARKER_TYPE_FILTER_ENDPOINT, 0);
DR_ASSERT(size <= (int)sizeof(marker));
output_buffer(drcontext, data, marker_buf, marker_buf + size, 0);
data->buf_base = end;
}
}
set_local_mode(data, mode);
}
if (!skip_size_cap && mode != BBDUP_MODE_L0_FILTER &&
(is_bytes_written_beyond_trace_max(data) || is_num_refs_beyond_global_max())) {
* beyond. We also don't put much effort into reducing overhead once
* beyond the limit: we still instrument and come here.
*/
do_write = false;
if (is_num_refs_beyond_global_max()) {
* but to avoid any risk we use DR's atomics.
* Update: we are now using std::atomic for some new variables.
*/
if (dr_atomic_load32(¬ify_beyond_global_max_once) == 0) {
int count = dr_atomic_add32_return_sum(¬ify_beyond_global_max_once, 1);
if (count == 1) {
NOTIFY(0, "Hit -max_global_trace_refs: disabling tracing.\n");
instru->set_frozen_timestamp(instru_t::get_timestamp());
}
}
}
}
if (do_write) {
if (op_L0_filter_until_instrs.get_value() && mode == BBDUP_MODE_L0_FILTER) {
uintptr_t toadd =
*(uintptr_t *)TLS_SLOT(data->seg_base, MEMTRACE_TLS_OFFS_ICOUNT);
bool reached_L0_filter_until_instrs_limit = count_traced_instrs(
drcontext, toadd, op_L0_filter_until_instrs.get_value());
if (reached_L0_filter_until_instrs_limit) {
NOTIFY(0, "Adding filter endpoint marker for -L0_filter_until_instrs\n");
size_t add =
instru->append_marker(buf_ptr, TRACE_MARKER_TYPE_FILTER_ENDPOINT, 0);
buf_ptr += add;
NOTIFY(0,
"Hit tracing window #%zd filter limit: switching to full trace.\n",
tracing_window.load(std::memory_order_acquire));
tracing_mode.store(BBDUP_MODE_TRACE, std::memory_order_release);
set_local_mode(data, BBDUP_MODE_TRACE);
}
} else if (op_trace_for_instrs.get_value() > 0) {
bool hit_window_end = false;
for (mem_ref = data->buf_base + header_size; mem_ref < buf_ptr;
mem_ref += instru->sizeof_entry()) {
if (!window_changed && !hit_window_end &&
op_trace_for_instrs.get_value() > 0) {
hit_window_end =
count_traced_instrs(drcontext, instru->get_instr_count(mem_ref),
op_trace_for_instrs.get_value());
}
}
if (hit_window_end) {
if (op_offline.get_value() && op_split_windows.get_value()) {
size_t add =
instru->append_thread_exit(buf_ptr, dr_get_thread_id(drcontext));
buf_ptr += add;
}
reached_traced_instrs_threshold(drcontext);
}
}
size_t skip = 0;
if (op_use_physical.get_value()) {
skip = process_buffer_for_physaddr(drcontext, data, header_size, buf_ptr);
}
current_num_refs +=
output_buffer(drcontext, data, data->buf_base + skip, buf_ptr, header_size);
}
if (file_ops_func.handoff_buf == NULL) {
memset(data->buf_base, 0, trace_buf_size);
redzone = data->buf_base + trace_buf_size;
if (buf_ptr > redzone) {
memset(redzone, -1, buf_ptr - redzone);
}
}
BUF_PTR(data->seg_base) = data->buf_base;
ptr_int_t window = -1;
if (has_tracing_windows()) {
window = tracing_window.load(std::memory_order_acquire);
set_local_window(drcontext, window);
}
BUF_PTR(data->seg_base) += append_unit_header(drcontext, BUF_PTR(data->seg_base),
dr_get_thread_id(drcontext), window);
num_refs_racy += current_num_refs;
if (mode == BBDUP_MODE_L0_FILTER) {
num_filter_refs_racy += current_num_refs;
}
if (mode != BBDUP_MODE_L0_FILTER && op_exit_after_tracing.get_value() > 0 &&
(num_refs_racy - num_filter_refs_racy) > op_exit_after_tracing.get_value()) {
dr_mutex_lock(mutex);
if (!exited_process) {
exited_process = true;
dr_mutex_unlock(mutex);
NOTIFY(0, "Exiting process after ~" UINT64_FORMAT_STRING " references.\n",
num_refs_racy - num_filter_refs_racy);
dr_exit_process(0);
}
dr_mutex_unlock(mutex);
}
}
void
init_buffers(per_thread_t *data)
{
create_buffer(data);
if (op_use_physical.get_value()) {
create_v2p_buffer(data);
}
}
void
init_thread_io(void *drcontext)
{
per_thread_t *data = (per_thread_t *)drmgr_get_tls_field(drcontext, tls_idx);
byte *proc_info;
NOTIFY(2, "T" TIDFMT " in init_thread_io.\n", dr_get_thread_id(drcontext));
#ifdef HAS_ZLIB
if (op_offline.get_value() &&
(op_raw_compress.get_value() == "zlib" ||
op_raw_compress.get_value() == "gzip")) {
data->buf_compressed = static_cast<byte *>(
dr_raw_mem_alloc(max_buf_size, DR_MEMPROT_READ | DR_MEMPROT_WRITE, nullptr));
}
#endif
#ifdef HAS_LZ4
if (op_offline.get_value() && op_raw_compress.get_value() == "lz4") {
data->buf_lz4_size = LZ4F_compressBound(max_buf_size, &lz4_ops);
DR_ASSERT(data->buf_lz4_size >= LZ4F_HEADER_SIZE_MAX);
data->buf_lz4 = static_cast<byte *>(dr_raw_mem_alloc(
data->buf_lz4_size, DR_MEMPROT_READ | DR_MEMPROT_WRITE, nullptr));
}
#endif
if (op_use_physical.get_value()) {
if (!data->physaddr.init()) {
FATAL("Unable to open pagemap for physical addresses in thread T%d: check "
"privileges.\n",
dr_get_thread_id(drcontext));
}
}
set_local_window(drcontext, -1);
if (has_tracing_windows())
set_local_window(drcontext, tracing_window.load(std::memory_order_acquire));
set_local_mode(data, tracing_mode.load(std::memory_order_acquire));
if (op_offline.get_value()) {
if (is_in_tracing_mode(tracing_mode.load(std::memory_order_acquire))) {
open_new_thread_file(drcontext, get_local_window(data));
}
if (!has_tracing_windows()) {
data->init_header_size = prepend_offline_thread_header(drcontext);
} else {
}
BUF_PTR(data->seg_base) +=
append_unit_header(drcontext, BUF_PTR(data->seg_base),
dr_get_thread_id(drcontext), get_local_window(data));
if (op_L0_filter_until_instrs.get_value()) {
uintptr_t mode = tracing_mode.load(std::memory_order_acquire);
if (mode == BBDUP_MODE_TRACE) {
BUF_PTR(data->seg_base) += instru->append_marker(
BUF_PTR(data->seg_base), TRACE_MARKER_TYPE_FILTER_ENDPOINT, 0);
}
}
} else {
char buf[MAXIMUM_PATH];
proc_info = (byte *)buf;
proc_info += reinterpret_cast<online_instru_t *>(instru)->append_thread_header(
proc_info, dr_get_thread_id(drcontext), get_file_type());
DR_ASSERT(BUFFER_SIZE_BYTES(buf) >= (size_t)(proc_info - (byte *)buf));
write_trace_data(drcontext, (byte *)buf, proc_info, get_local_window(data));
data->init_header_size =
append_unit_header(drcontext, data->buf_base, dr_get_thread_id(drcontext),
get_local_window(data));
BUF_PTR(data->seg_base) = data->buf_base + data->init_header_size;
}
}
void
exit_thread_io(void *drcontext)
{
per_thread_t *data = (per_thread_t *)drmgr_get_tls_field(drcontext, tls_idx);
#ifdef UNIX
* i#2384:
* On Linux, the thread exit event may be invoked twice for the same thread
* if that thread is alive during a process fork, but doesn't call the fork
* itself. The first time the event callback is executed from the fork child
* immediately after the fork, the second time it is executed during the
* regular thread exit.
* data->file could be already closed. Write file operation is fail
* and it is asserted.
*/
if (dr_get_process_id() != dr_get_process_id_from_drcontext(drcontext)) {
return;
}
#endif
if (is_in_tracing_mode(tracing_mode.load(std::memory_order_acquire)) ||
(has_tracing_windows() &&
(!op_split_windows.get_value() ||
(get_local_window(data) < tracing_window.load(std::memory_order_acquire) &&
!is_new_window_buffer_empty(data)))) ||
(!has_tracing_windows() && align_attach_detach_endpoints() &&
(data->bytes_written > 0 ||
BUF_PTR(data->seg_base) - data->buf_base >
static_cast<ssize_t>(data->init_header_size + buf_hdr_slots_size)))) {
BUF_PTR(data->seg_base) += instru->append_thread_exit(
BUF_PTR(data->seg_base), dr_get_thread_id(drcontext));
process_and_output_buffer(drcontext,
* its exit even if we're over a size limit.
*/
data->bytes_written > 0);
}
if (op_offline.get_value() && data->file != INVALID_FILE)
close_thread_file(drcontext);
#ifdef HAS_ZLIB
if (op_offline.get_value() &&
(op_raw_compress.get_value() == "zlib" ||
op_raw_compress.get_value() == "gzip")) {
dr_raw_mem_free(data->buf_compressed, max_buf_size);
}
#endif
#ifdef HAS_LZ4
if (op_offline.get_value() && op_raw_compress.get_value() == "lz4") {
dr_raw_mem_free(data->buf_lz4, data->buf_lz4_size);
}
#endif
}
void
init_io()
{
if (op_raw_compress.get_value() == "none"
#ifdef HAS_SNAPPY
|| op_raw_compress.get_value() == "snappy" ||
op_raw_compress.get_value() == "snappy_nocrc"
#endif
#ifdef HAS_ZLIB
|| op_raw_compress.get_value() == "gzip" || op_raw_compress.get_value() == "zlib"
#endif
#ifdef HAS_LZ4
|| op_raw_compress.get_value() == "lz4"
#endif
) {
} else {
FATAL("Usage error: unknown -raw_compress type %s.",
op_raw_compress.get_value().c_str());
}
#ifdef HAS_SNAPPY
if (op_offline.get_value() && snappy_enabled()) {
* allocator, meaning we cannot support it for static linking, so we override
* the DR_DISALLOW_UNSAFE_STATIC declaration.
* XXX: Send a patch to libsnappy to parameterize the allocator.
*/
dr_allow_unsafe_static_behavior();
# ifdef DRMEMTRACE_STATIC
NOTIFY(0, "-raw_compress snappy is unsafe with statically linked clients\n");
# endif
}
#endif
#ifdef HAS_LZ4
if (op_offline.get_value() && op_raw_compress.get_value() == "lz4") {
dr_allow_unsafe_static_behavior();
# ifdef DRMEMTRACE_STATIC
NOTIFY(0, "-raw_compress lz4 is unsafe with statically linked clients\n");
# endif
}
#endif
DR_ASSERT(cur_window_instr_count.is_lock_free());
}
void
exit_io()
{
notify_beyond_global_max_once = 0;
}
}
}