* Copyright (c) 2011-2023 Google, Inc. All rights reserved.
* Copyright (c) 2001-2010 VMware, Inc. All rights reserved.
* **********************************************************/
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
* interp.c - interpreter used for native trace selection
*/
#include "../globals.h"
#include "../link.h"
#include "../fragment.h"
#include "../emit.h"
#include "../dispatch.h"
#include "../fcache.h"
#include "../monitor.h"
#include "arch.h"
#include "instr.h"
#include "instr_create_shared.h"
#include "instrlist.h"
#include "decode.h"
#include "decode_fast.h"
#include "disassemble.h"
#include "instrument.h"
#include "../hotpatch.h"
#ifdef RETURN_AFTER_CALL
# include "../rct.h"
#endif
#ifdef WINDOWS
# include "ntdll.h"
# include "../nudge.h"
#endif
#include "../perscache.h"
#include "../native_exec.h"
#include "../jit_opt.h"
#ifdef CHECK_RETURNS_SSE2
# include <setjmp.h>
#endif
#ifdef VMX86_SERVER
# include "vmkuw.h"
#endif
#ifdef ANNOTATIONS
# include "../annotations.h"
#endif
#ifdef AARCH64
# include "build_ldstex.h"
#endif
enum { DIRECT_XFER_LENGTH = 5 };
static void
process_nops_for_trace(dcontext_t *dcontext, instrlist_t *ilist,
uint flags _IF_DEBUG(bool recreating));
static int
fixup_last_cti(dcontext_t *dcontext, instrlist_t *trace, app_pc next_tag, uint next_flags,
uint trace_flags, fragment_t *prev_f, linkstub_t *prev_l,
bool record_translation, uint *num_exits_deleted ,
instr_t *start_instr, instr_t *end_instr);
bool
mangle_trace(dcontext_t *dcontext, instrlist_t *ilist, monitor_data_t *md);
* creation mechanism to stitch basic blocks together
*/
#define BRANCH_LIMIT 1
* of calls.
* also, we have a limit on fragment body sizes, which should be impossible
* to break since x86 instrs are max 17 bytes and we only modify ctis.
* Although...selfmod mangling does really expand fragments!
* -selfmod_max_writes helps for selfmod bbs (case 7893/7909).
* System call mangling is also large, for degenerate cases like tests/linux/infinite.
* PR 215217: also client additions: we document and assert.
* FIXME: need better way to know how big will get, b/c we can construct
* cases that will trigger the size assertion!
*/
DECLARE_CXTSWPROT_VAR(mutex_t bb_building_lock, INIT_LOCK_FREE(bb_building_lock));
volatile bool bb_lock_start;
static file_t bbdump_file = INVALID_FILE;
#ifdef DEBUG
DECLARE_NEVERPROT_VAR(uint debug_bb_count, 0);
#endif
void
interp_init()
{
if (INTERNAL_OPTION(bbdump_tags)) {
bbdump_file = open_log_file("bbs", NULL, 0);
ASSERT(bbdump_file != INVALID_FILE);
}
}
#ifdef CUSTOM_TRACES_RET_REMOVAL
# ifdef DEBUG
static int num_rets_removed;
# endif
#endif
void
interp_exit()
{
if (INTERNAL_OPTION(bbdump_tags)) {
close_log_file(bbdump_file);
}
DELETE_LOCK(bb_building_lock);
LOG(GLOBAL, LOG_INTERP | LOG_STATS, 1, "Total application code seen: %d KB\n",
GLOBAL_STAT(app_code_seen) / 1024);
#ifdef CUSTOM_TRACES_RET_REMOVAL
# ifdef DEBUG
LOG(GLOBAL, LOG_INTERP | LOG_STATS, 1, "Total rets removed: %d\n", num_rets_removed);
# endif
#endif
}
****************************************************************************
*
* B A S I C B L O C K B U I L D I N G
*/
* so we can have separate routines for readability
*/
typedef struct {
app_pc start_pc;
bool app_interp;
* translation or figuring out what pages a bb touches? */
bool for_cache;
bool record_vmlist;
bool mangle_ilist;
bool record_translation;
bool has_bb_building_lock;
bool checked_start_vmarea;
file_t outf;
* we use this mainly for dumping trace origins */
app_pc stop_pc;
* Only checked for full_decode.
*/
bool pass_to_client;
* we store this up front to avoid race conditions
* between full_decode setting and hook calling time.
*/
bool post_client;
bool for_trace;
overlap_info_t *overlap_info;
* caller must initialize region_start and region_end */
instrlist_t *ilist;
uint flags;
void *vmlist;
app_pc end_pc;
bool native_exec;
bool native_call;
instrlist_t **unmangled_ilist;
bool full_decode;
bool follow_direct;
bool check_vm_area;
uint num_elide_jmp;
uint num_elide_call;
app_pc last_page;
app_pc cur_pc;
app_pc instr_start;
app_pc checked_end;
cache_pc exit_target;
uint exit_type;
ibl_branch_type_t ibl_branch_type;
instr_t *instr;
int eflags;
app_pc pretend_pc;
#ifdef ARM
dr_pred_type_t svc_pred;
#endif
DEBUG_DECLARE(bool initialized;)
} build_bb_t;
static inline bool
bb_process_syscall(dcontext_t *dcontext, build_bb_t *bb);
static void
init_build_bb(build_bb_t *bb, app_pc start_pc, bool app_interp, bool for_cache,
bool mangle_ilist, bool record_translation, file_t outf, uint known_flags,
overlap_info_t *overlap_info)
{
memset(bb, 0, sizeof(*bb));
#if defined(LINUX) && defined(X86_32)
* whose fall-through hits our hook. We avoid interpreting our own hook
* by shifting it to the displaced pc.
*/
if (DYNAMO_OPTION(hook_vsyscall) && start_pc == vsyscall_sysenter_return_pc) {
if (vsyscall_sysenter_displaced_pc != NULL)
start_pc = vsyscall_sysenter_displaced_pc;
else {
ASSERT(should_syscall_method_be_sysenter());
}
}
#endif
bb->check_vm_area = true;
bb->start_pc = start_pc;
bb->app_interp = app_interp;
bb->for_cache = for_cache;
if (bb->for_cache)
bb->record_vmlist = true;
bb->mangle_ilist = mangle_ilist;
bb->record_translation = record_translation;
bb->outf = outf;
bb->overlap_info = overlap_info;
bb->follow_direct = !TEST(FRAG_SELFMOD_SANDBOXED, known_flags);
bb->flags = known_flags;
bb->ibl_branch_type = IBL_GENERIC;
#ifdef ARM
bb->svc_pred = DR_PRED_NONE;
#endif
DODEBUG(bb->initialized = true;);
}
static void
reset_overlap_info(dcontext_t *dcontext, build_bb_t *bb)
{
bb->overlap_info->start_pc = bb->start_pc;
bb->overlap_info->min_pc = bb->start_pc;
bb->overlap_info->max_pc = bb->start_pc;
bb->overlap_info->contiguous = true;
bb->overlap_info->overlap = false;
}
static void
update_overlap_info(dcontext_t *dcontext, build_bb_t *bb, app_pc new_pc, bool jmp)
{
if (new_pc < bb->overlap_info->min_pc)
bb->overlap_info->min_pc = new_pc;
if (new_pc > bb->overlap_info->max_pc)
bb->overlap_info->max_pc = new_pc;
LOG(THREAD, LOG_ALL, 5, "\t app_bb_overlaps " PFX ".." PFX " %s\n", bb->last_page,
new_pc, jmp ? "jmp" : "");
if (!bb->overlap_info->overlap && !jmp) {
if (bb->last_page < bb->overlap_info->region_end &&
new_pc > bb->overlap_info->region_start) {
LOG(THREAD_GET, LOG_ALL, 5, "\t it overlaps!\n");
bb->overlap_info->overlap = true;
}
}
if (bb->overlap_info->contiguous && jmp)
bb->overlap_info->contiguous = false;
}
#ifdef DEBUG
# define BBPRINT(bb, level, ...) \
do { \
LOG(THREAD, LOG_INTERP, level, __VA_ARGS__); \
if (bb->outf != INVALID_FILE && bb->outf != (THREAD)) \
print_file(bb->outf, __VA_ARGS__); \
} while (0);
#else
# ifdef INTERNAL
# define BBPRINT(bb, level, ...) \
do { \
if (bb->outf != INVALID_FILE) \
print_file(bb->outf, __VA_ARGS__); \
} while (0);
# else
# define BBPRINT(bb, level, ...)
# endif
#endif
#ifdef WINDOWS
extern void
intercept_load_dll(void);
extern void
intercept_unload_dll(void);
# ifdef INTERNAL
extern void
DllMainThreadAttach(void);
# endif
#endif
static bool
mangle_bb_ilist(dcontext_t *dcontext, build_bb_t *bb);
static void
build_native_exec_bb(dcontext_t *dcontext, build_bb_t *bb);
static bool
at_native_exec_gateway(dcontext_t *dcontext, app_pc start,
bool *is_call _IF_DEBUG(bool xfer_target));
#ifdef DEBUG
static void
report_native_module(dcontext_t *dcontext, app_pc modpc);
#endif
* Image entry
*/
static bool reached_image_entry = false;
static INLINE_FORCED bool
check_for_image_entry(app_pc bb_start)
{
if (!reached_image_entry && bb_start == get_image_entry()) {
LOG(THREAD_GET, LOG_ALL, 1, "Reached image entry point " PFX "\n", bb_start);
set_reached_image_entry();
return true;
}
return false;
}
void
set_reached_image_entry()
{
SELF_UNPROTECT_DATASEC(DATASEC_RARELY_PROT);
reached_image_entry = true;
SELF_PROTECT_DATASEC(DATASEC_RARELY_PROT);
}
bool
reached_image_entry_yet()
{
return reached_image_entry;
}
* Whether to inline or elide callees
*/
* still be mangled.
*/
static inline bool
must_not_be_entered(app_pc pc)
{
return false
#ifdef DR_APP_EXPORTS
* on seeing a bb starting at dr_app_running_under_dynamorio.
*/
|| pc == (app_pc)dr_app_running_under_dynamorio
#endif
;
}
static inline bool
leave_call_native(app_pc pc)
{
return (
#ifdef INTERNAL
!dynamo_options.inline_calls
#else
0
#endif
#ifdef WINDOWS
|| pc == (app_pc)intercept_load_dll || pc == (app_pc)intercept_unload_dll
* own DllMain calls it! */
# ifdef INTERNAL
|| pc == (app_pc)DllMainThreadAttach
# endif
|| (pc == (app_pc)generic_nudge_handler)
#else
* libs, but for now we have to let the loader call _fini()
* in the client, which may end up calling __wrap_free().
* It's simpler to let those be interpreted and make a native
* call to the real heap routine here as this is a direct
* call whereas we'd need native_exec for the others:
*/
|| pc == (app_pc)global_heap_free
#endif
);
}
static inline bool
must_not_be_elided(app_pc pc)
{
#ifdef WINDOWS
* interpret the return path from trampolines. The forward jump leads to
* the trampoline and shouldn't be elided. */
if (is_on_interception_initial_route(pc))
return true;
#endif
return (0
#ifdef WINDOWS
* we don't want to interpret the code in that buffer, as it may swap to the
* dstack and mess up a return-from-fcache.
* N.B.: if use this routine anywhere else, pay attention to the
* hack for is_syscall_trampoline() in the use here!
*/
|| (is_in_interception_buffer(pc))
#else
#endif
);
}
#ifdef DR_APP_EXPORTS
* dynamo API routines that would really mess things up
*/
static inline bool
must_escape_from(app_pc pc)
{
* a ret instruction...haven't set up frame yet so stack fine, only
* problem is return value, go ahead and overwrite xax, it's caller-saved
* FIXME: is this ok?
*/
* because of stubs, etc. that end up doing indirect jumps to them!
*/
bool res = false
# ifdef DR_APP_EXPORTS
|| (automatic_startup &&
(pc == (app_pc)dynamorio_app_init || pc == (app_pc)dr_app_start ||
pc == (app_pc)dynamo_thread_init || pc == (app_pc)dynamorio_app_exit ||
pc == (app_pc)dynamo_thread_exit))
# endif
;
# ifdef DEBUG
if (res) {
# ifdef DR_APP_EXPORTS
LOG(THREAD_GET, LOG_INTERP, 3, "must_escape_from: found ");
if (pc == (app_pc)dynamorio_app_init)
LOG(THREAD_GET, LOG_INTERP, 3, "dynamorio_app_init\n");
else if (pc == (app_pc)dr_app_start)
LOG(THREAD_GET, LOG_INTERP, 3, "dr_app_start\n");
else if (pc == (app_pc)dynamo_thread_init)
LOG(THREAD_GET, LOG_INTERP, 3, "dynamo_thread_init\n");
else if (pc == (app_pc)dynamorio_app_exit)
LOG(THREAD_GET, LOG_INTERP, 3, "dynamorio_app_exit\n");
else if (pc == (app_pc)dynamo_thread_exit)
LOG(THREAD_GET, LOG_INTERP, 3, "dynamo_thread_exit\n");
# endif
}
# endif
return res;
}
#endif
* execution. Makes sure its target is reachable from the code cache, which
* is critical for jmps b/c they're native for our hooks of app code which may
* not be reachable from the code cache. Also needed for calls b/c in the future
* (i#774) the DR lib (and thus our leave_call_native() calls) won't be reachable
* from the cache.
*/
static void
bb_add_native_direct_xfer(dcontext_t *dcontext, build_bb_t *bb, bool appended)
{
#if defined(X86) && defined(X64)
* make sure it still reaches its target. We could try to check
* reachability from the likely code cache slot, but these should be
* rare enough that making them indirect won't matter and then we have
* fewer reachability dependences.
* We do this here rather than in d_r_mangle() b/c we'd have a hard time
* distinguishing native jmp/call due to DR's own operations from a
* client's inserted meta jmp/call.
*/
* Alternative would be to embed the target into the code stream.
* We don't need to set translation b/c these are meta instrs and they
* won't fault.
*/
ptr_uint_t tgt = (ptr_uint_t)opnd_get_pc(instr_get_target(bb->instr));
opnd_t tls_slot = opnd_create_sized_tls_slot(os_tls_offset(TLS_XAX_SLOT), OPSZ_4);
instrlist_meta_append(
bb->ilist, INSTR_CREATE_mov_imm(dcontext, tls_slot, OPND_CREATE_INT32((int)tgt)));
opnd_set_disp(&tls_slot, opnd_get_disp(tls_slot) + 4);
instrlist_meta_append(
bb->ilist,
INSTR_CREATE_mov_imm(dcontext, tls_slot, OPND_CREATE_INT32((int)(tgt >> 32))));
if (instr_is_ubr(bb->instr)) {
instrlist_meta_append(
bb->ilist,
INSTR_CREATE_jmp_ind(dcontext,
opnd_create_tls_slot(os_tls_offset(TLS_XAX_SLOT))));
bb->exit_type |= instr_branch_type(bb->instr);
} else {
ASSERT(instr_is_call_direct(bb->instr));
instrlist_meta_append(
bb->ilist,
INSTR_CREATE_call_ind(dcontext,
opnd_create_tls_slot(os_tls_offset(TLS_XAX_SLOT))));
}
if (appended)
instrlist_remove(bb->ilist, bb->instr);
instr_destroy(dcontext, bb->instr);
bb->instr = NULL;
#elif defined(ARM)
ASSERT_NOT_IMPLEMENTED(false);
#else
if (appended) {
instr_set_translation(bb->instr, NULL);
} else
instrlist_append(bb->ilist, bb->instr);
* re-encoded, and that it is not an exit cti.
* However, we must mangle this to ensure it reaches (i#992)
* which we special-case in d_r_mangle().
*/
instr_set_meta(bb->instr);
instr_set_raw_bits_valid(bb->instr, false);
#endif
}
* to be. We assume we only have to check after control transfer instructions,
* i.e., we assume that all of these conditions are procedures that are only
* entered by calling or jumping, never falling through.
*/
static inline bool
check_for_stopping_point(dcontext_t *dcontext, build_bb_t *bb)
{
#ifdef DR_APP_EXPORTS
if (must_escape_from(bb->cur_pc)) {
reg_id_t reg = IF_X86_ELSE(REG_EAX, IF_RISCV64_ELSE(DR_REG_A0, DR_REG_R0));
BBPRINT(bb, 3, "interp: emergency exit from " PFX "\n", bb->cur_pc);
* a ret instruction...haven't set up frame yet so stack fine, only
* problem is return value, go ahead and overwrite xax, it's
* caller-saved.
* FIXME: is this ok?
*/
instrlist_append(
bb->ilist,
XINST_CREATE_load_int(dcontext, opnd_create_reg(reg), OPND_CREATE_INT32(0)));
instrlist_append(bb->ilist, XINST_CREATE_return(dcontext));
bb->exit_type |= LINK_INDIRECT | LINK_RETURN;
bb->exit_target =
get_ibl_routine(dcontext, IBL_LINKED, DEFAULT_IBL_BB(), IBL_RETURN);
return true;
}
#endif
#ifdef CHECK_RETURNS_SSE2
if (bb->cur_pc == (app_pc)longjmp) {
SYSLOG_INTERNAL_WARNING("encountered longjmp, which will cause ret mismatch!");
}
#endif
return is_stopping_point(dcontext, bb->cur_pc);
}
* arithmetic flag prior to writing it.
* Usage: first initialize status to 0 and eflags_6 to 0.
* Then call this routine for each instr in sequence, assigning result to status.
* eflags_6 holds flags written and read so far.
* Uses these flags, defined in instr.h, as status values:
* EFLAGS_WRITE_ARITH = writes all arith flags before reading any
* EFLAGS_WRITE_OF = writes OF before reading it (x86-onlY)
* EFLAGS_READ_ARITH = reads some of arith flags before writing
* EFLAGS_READ_OF = reads OF before writing OF (x86-only)
* 0 = no information yet
* On ARM, Q and GE flags are ignored.
*/
static inline int
eflags_analysis(instr_t *instr, int status, uint *eflags_6)
{
uint e6 = *eflags_6;
uint e6_w2r = EFLAGS_WRITE_TO_READ(e6);
uint instr_eflags = instr_get_arith_flags(instr, DR_QUERY_DEFAULT);
* result is writes to OF to see if later writes to rest of flags
* before reading any, and keep going if reads one of the 6 to see
* if later writes to OF before reading it.
*/
if (instr_eflags == 0 ||
status == EFLAGS_WRITE_ARITH IF_X86(|| status == EFLAGS_READ_OF))
return status;
if ((instr_eflags & EFLAGS_READ_ARITH) != 0 &&
(!instr_opcode_valid(instr) || !instr_is_interrupt(instr))) {
e6 |= (instr_eflags & EFLAGS_READ_ARITH);
*eflags_6 = e6;
if ((e6_w2r | (instr_eflags & EFLAGS_READ_ARITH)) != e6_w2r) {
status = EFLAGS_READ_ARITH;
LOG(THREAD_GET, LOG_INTERP, 4, "\treads flag before writing it!\n");
#ifdef X86
if ((instr_eflags & EFLAGS_READ_OF) != 0 && (e6 & EFLAGS_WRITE_OF) == 0) {
status = EFLAGS_READ_OF;
LOG(THREAD_GET, LOG_INTERP, 4, "\t reads OF prior to writing it!\n");
}
#endif
}
} else if ((instr_eflags & EFLAGS_WRITE_ARITH) != 0) {
e6 |= (instr_eflags & EFLAGS_WRITE_ARITH);
*eflags_6 = e6;
if ((e6 & EFLAGS_WRITE_ARITH) == EFLAGS_WRITE_ARITH &&
(e6 & EFLAGS_READ_ARITH) == 0) {
status = EFLAGS_WRITE_ARITH;
LOG(THREAD_GET, LOG_INTERP, 4, "\twrote all 6 flags now!\n");
}
#ifdef X86
else if ((e6 & EFLAGS_WRITE_OF) != 0 && (e6 & EFLAGS_READ_OF) == 0) {
status = EFLAGS_WRITE_OF;
LOG(THREAD_GET, LOG_INTERP, 4, "\twrote overflow flag before reading it!\n");
}
#endif
}
return status;
}
* 1) we need list of areas where this thread's fragments come
* from, for faster flushing on munmaps
* 2) also for faster flushing, each vmarea has a list of fragments
* 3) we need to mark as read-only any writable region that
* has a fragment come from it, to handle self-modifying code
* 4) for PROGRAM_SHEPHERDING restricted code origins for security
* 5) for restricted execution environments: not letting bb cross regions
*/
FIXME CASE 7380:
since report security violation before execute off bad page, can be
false positive due to:
- a faulting instruction in middle of bb would have prevented
getting there
- ignorable syscall in middle
- self-mod code would have ended bb sooner than bad page
One solution is to have check_thread_vm_area() return false and have
bb building stop at checked_end if a violation will occur when we
get there. Then we only raise the violation once building a bb
starting there.
*/
static inline void
check_new_page_start(dcontext_t *dcontext, build_bb_t *bb)
{
DEBUG_DECLARE(bool ok;)
if (!bb->check_vm_area)
return;
DEBUG_DECLARE(ok =)
check_thread_vm_area(dcontext, bb->start_pc, bb->start_pc,
(bb->record_vmlist ? &bb->vmlist : NULL), &bb->flags,
&bb->checked_end, false );
ASSERT(ok);
bb->last_page = bb->start_pc;
if (bb->overlap_info != NULL)
reset_overlap_info(dcontext, bb);
}
* FIXME: with checked_end we don't need to call this on every contig end
* while bb building like we used to. Should revisit the overlap info and
* walk_app_bb reasons for keeping those contig() calls and see if we can
* optimize them away for bb building at least.
* i#993: new_pc points to the last byte of the current instruction and is not
* an open-ended endpoint.
*/
static inline bool
check_new_page_contig(dcontext_t *dcontext, build_bb_t *bb, app_pc new_pc)
{
bool is_first_instr = (bb->instr_start == bb->start_pc);
if (!bb->check_vm_area)
return true;
if (bb->checked_end == NULL) {
ASSERT(new_pc == bb->start_pc);
} else if (new_pc >= bb->checked_end) {
if (!check_thread_vm_area(dcontext, new_pc, bb->start_pc,
(bb->record_vmlist ? &bb->vmlist : NULL), &bb->flags,
&bb->checked_end,
* incompatible vmarea, so we treat fall
* through like a transfer. We can't end the
* bb before the first instruction, so we pass
* false to forcibly merge in the vmarea
* flags.
*/
!is_first_instr )) {
return false;
}
}
if (bb->overlap_info != NULL)
update_overlap_info(dcontext, bb, new_pc, false );
DOLOG(4, LOG_INTERP, {
if (PAGE_START(bb->last_page) != PAGE_START(new_pc))
LOG(THREAD, LOG_INTERP, 4, "page boundary crossed\n");
});
bb->last_page = new_pc;
return true;
}
static bool
check_new_page_jmp(dcontext_t *dcontext, build_bb_t *bb, app_pc new_pc)
{
bool ok = check_new_page_contig(dcontext, bb, bb->cur_pc - 1);
ASSERT(ok && "should have checked cur_pc-1 in decode loop");
if (!ok)
return false;
* not good enough to only check this at top of interp -- could walk contig
* from non-selfmod to selfmod page, and then do a direct cti, which
* check_thread_vm_area would allow (no flag changes on direct cti)!
* also not good enough to put this check in check_thread_vm_area, as that
* only checks across pages.
*/
if ((bb->flags & FRAG_SELFMOD_SANDBOXED) != 0)
return false;
if (PAGE_START(bb->last_page) != PAGE_START(new_pc))
LOG(THREAD, LOG_INTERP, 4, "page boundary crossed\n");
* though could happen if bypass a gateway -- even then this is a feature
* to allow getting back to native ASAP)
* FIXME: we could assume that such direct calls only
* occur from DGC, and rely on check_thread_vm_area to disallow,
* as an (unsafe) optimization
*/
if (DYNAMO_OPTION(native_exec) && DYNAMO_OPTION(native_exec_dircalls) &&
!vmvector_empty(native_exec_areas) && is_native_pc(new_pc))
return false;
* and aren't on null_instrument_list, don't elide the jmp.
* XXX i#884: if we haven't yet executed from the 2nd module, the client
* won't receive the module load event yet and we might include code
* from it here. It would be tricky to solve that, and it should only happen
* if the client turns on elision, so we leave it.
*/
if ((!!os_module_get_flag(bb->cur_pc, MODULE_NULL_INSTRUMENT)) !=
(!!os_module_get_flag(new_pc, MODULE_NULL_INSTRUMENT)))
return false;
if (!bb->check_vm_area)
return true;
if (!check_thread_vm_area(dcontext, new_pc, bb->start_pc,
(bb->record_vmlist ? &bb->vmlist : NULL), &bb->flags,
&bb->checked_end, true ))
return false;
if (bb->overlap_info != NULL)
update_overlap_info(dcontext, bb, new_pc, true );
bb->flags |= FRAG_HAS_DIRECT_CTI;
bb->last_page = new_pc;
return true;
}
static inline void
bb_process_single_step(dcontext_t *dcontext, build_bb_t *bb)
{
LOG(THREAD, LOG_INTERP, 2, "interp: single step exception bb at " PFX "\n",
bb->instr_start);
* In this case, we should test if only one iteration is done
* before the single step exception.
*/
instrlist_append(bb->ilist, bb->instr);
instr_set_translation(bb->instr, bb->instr_start);
instr_branch_set_special_exit(bb->instr, true);
bb->exit_type |= LINK_SPECIAL_EXIT;
bb->flags &= ~FRAG_SHARED;
bb->flags |= FRAG_CANNOT_BE_TRACE;
}
static inline void
bb_process_invalid_instr(dcontext_t *dcontext, build_bb_t *bb)
{
* reach the instr itself
*/
LOG(THREAD, LOG_INTERP, 2, "interp: invalid instr at " PFX "\n", bb->instr_start);
* for recreating state, so check bb->app_interp parameter to find out
* if building a real app bb to be executed
*/
if (bb->app_interp && bb->instr_start == bb->start_pc) {
* we need to generate an invalid instruction exception.
* A benefit of being first instr is that the state is easy
* to translate.
*/
* would help on Windows where the kernel splits invalid instructions into
* different cases (an invalid lock prefix and other distinctions, when the
* underlying processor has a single interrupt 6), and it is hard to
* duplicate Windows' behavior in our forged exception. However, we are not
* certain that this instruction will raise a fault on the processor. It
* might not if our decoder has a bug, or a new processor has added new
* opcodes, or just due to processor variations in undefined gray areas.
* Trying to copy without knowing the length of the instruction is a recipe
* for disaster: it can lead to executing junk and even missing our exit cti
* (i#3939).
*/
* and to change it. That's not easy to do though when we don't know what
* it is. But it's confusing for the client to get the illegal instr fault
* having never seen the problematic instr in a bb event.
*/
* avoid this app exception for new opcodes.
*/
ASSERT(dcontext->bb_build_info == bb);
bb_build_abort(dcontext, true , true );
* know windows uses different exception codes for different
* types of invalid instructions (for ex. STATUS_INVALID_LOCK
* _SEQUENCE for lock prefix on a jmp instruction).
*/
if (TEST(DUMPCORE_FORGE_ILLEGAL_INST, DYNAMO_OPTION(dumpcore_mask)))
os_dump_core("Warning: Encountered Illegal Instruction");
os_forge_exception(bb->instr_start, ILLEGAL_INSTRUCTION_EXCEPTION);
ASSERT_NOT_REACHED();
} else {
instr_destroy(dcontext, bb->instr);
bb->instr = NULL;
}
}
#ifdef X86
* should be used both for converted indirect jumps and
* FIXME: for direct jumps by bb_process_ubr
*/
static inline bool
follow_direct_jump(dcontext_t *dcontext, build_bb_t *bb, app_pc target)
{
if (bb->follow_direct && !must_not_be_entered(target) &&
bb->num_elide_jmp < DYNAMO_OPTION(max_elide_jmp) &&
(DYNAMO_OPTION(elide_back_jmps) || bb->cur_pc <= target)) {
if (check_new_page_jmp(dcontext, bb, target)) {
bb->num_elide_jmp++;
STATS_INC(total_elided_jmps);
STATS_TRACK_MAX(max_elided_jmps, bb->num_elide_jmp);
bb->cur_pc = target;
BBPRINT(bb, 4, " continuing at target " PFX "\n", bb->cur_pc);
return true;
} else {
BBPRINT(bb, 3, " NOT following jmp from " PFX " to " PFX "\n",
bb->instr_start, target);
}
} else {
BBPRINT(bb, 3, " NOT attempting to follow jump from " PFX " to " PFX "\n",
bb->instr_start, target);
}
return false;
}
#endif
static inline bool
bb_process_ubr(dcontext_t *dcontext, build_bb_t *bb)
{
app_pc tgt = (byte *)opnd_get_pc(instr_get_target(bb->instr));
BBPRINT(bb, 4, "interp: direct jump at " PFX "\n", bb->instr_start);
if (must_not_be_elided(tgt)) {
#ifdef WINDOWS
byte *wrapper_start;
if (is_syscall_trampoline(tgt, &wrapper_start)) {
* only for native syscalls -- we replace the jmp with the
* original app mov immed that it replaced
*/
BBPRINT(bb, 3,
"interp: replacing syscall trampoline @" PFX " w/ orig mov @" PFX
"\n",
bb->instr_start, wrapper_start);
instr_reset(dcontext, bb->instr);
decode(dcontext, wrapper_start, bb->instr);
* (usually mov_imm but can be lea if hooked_deeper) here */
ASSERT(instr_get_opcode(bb->instr) == OP_mov_imm ||
(instr_get_opcode(bb->instr) == OP_lea &&
DYNAMO_OPTION(native_exec_hook_conflict) ==
HOOKED_TRAMPOLINE_HOOK_DEEPER));
instrlist_append(bb->ilist, bb->instr);
* original application address
*/
if (bb->record_translation)
instr_set_translation(bb->instr, bb->instr_start);
if (instr_get_opcode(bb->instr) == OP_lea) {
app_pc translation = bb->instr_start + instr_length(dcontext, bb->instr);
ASSERT_CURIOSITY(instr_length(dcontext, bb->instr) == 4);
ASSERT(get_syscall_method() == SYSCALL_METHOD_INT);
bb->instr = INSTR_CREATE_int(dcontext,
opnd_create_immed_int((sbyte)0x2e, OPSZ_1));
if (bb->record_translation)
instr_set_translation(bb->instr, translation);
ASSERT(instr_is_syscall(bb->instr) &&
instr_get_opcode(bb->instr) == OP_int);
instrlist_append(bb->ilist, bb->instr);
return bb_process_syscall(dcontext, bb);
}
return true;
}
#endif
BBPRINT(bb, 3, "interp: NOT following jmp to " PFX "\n", tgt);
bb_add_native_direct_xfer(dcontext, bb, false );
bb->flags &= ~FRAG_COARSE_GRAIN;
STATS_INC(coarse_prevent_cti);
return false;
} else {
if (bb->follow_direct && !must_not_be_entered(tgt) &&
bb->num_elide_jmp < DYNAMO_OPTION(max_elide_jmp) &&
(DYNAMO_OPTION(elide_back_jmps) || bb->cur_pc <= tgt)) {
if (check_new_page_jmp(dcontext, bb, tgt)) {
bb->num_elide_jmp++;
STATS_INC(total_elided_jmps);
STATS_TRACK_MAX(max_elided_jmps, bb->num_elide_jmp);
bb->cur_pc = tgt;
BBPRINT(bb, 4, " continuing at target " PFX "\n", bb->cur_pc);
instr_destroy(dcontext, bb->instr);
bb->instr = NULL;
return true;
} else {
BBPRINT(bb, 3,
" NOT following direct jmp from " PFX " to " PFX "\n",
bb->instr_start, tgt);
}
}
bb->exit_target = opnd_get_pc(instr_get_target(bb->instr));
instrlist_append(bb->ilist, bb->instr);
return false;
}
return true;
}
#ifdef X86
* and false if not following due to hitting a limit or other reason */
static bool
follow_direct_call(dcontext_t *dcontext, build_bb_t *bb, app_pc callee)
{
* and in bb_process_call_direct()
*/
if (bb->follow_direct && !must_not_be_entered(callee) &&
bb->num_elide_call < DYNAMO_OPTION(max_elide_call) &&
(DYNAMO_OPTION(elide_back_calls) || bb->cur_pc <= callee)) {
if (check_new_page_jmp(dcontext, bb, callee)) {
bb->num_elide_call++;
STATS_INC(total_elided_calls);
STATS_TRACK_MAX(max_elided_calls, bb->num_elide_call);
bb->cur_pc = callee;
BBPRINT(bb, 4, " continuing in callee at " PFX "\n", bb->cur_pc);
return true;
} else {
BBPRINT(bb, 3,
" NOT following direct (or converted) call from " PFX " to " PFX
"\n",
bb->instr_start, callee);
}
} else {
BBPRINT(bb, 3, " NOT attempting to follow call from " PFX " to " PFX "\n",
bb->instr_start, callee);
}
return false;
}
#endif
static inline void
bb_stop_prior_to_instr(dcontext_t *dcontext, build_bb_t *bb, bool appended)
{
if (appended)
instrlist_remove(bb->ilist, bb->instr);
instr_destroy(dcontext, bb->instr);
bb->instr = NULL;
bb->cur_pc = bb->instr_start;
}
static inline bool
bb_process_call_direct(dcontext_t *dcontext, build_bb_t *bb)
{
byte *callee = (byte *)opnd_get_pc(instr_get_target(bb->instr));
#ifdef CUSTOM_TRACES_RET_REMOVAL
if (callee == bb->instr_start + 5) {
LOG(THREAD, LOG_INTERP, 4, "found call to next instruction\n");
} else
dcontext->num_calls++;
#endif
STATS_INC(num_all_calls);
BBPRINT(bb, 4, "interp: direct call at " PFX "\n", bb->instr_start);
if (leave_call_native(callee)) {
BBPRINT(bb, 3, "interp: NOT inlining or mangling call to " PFX "\n", callee);
* If we allow this fragment to be coarse we must kill the freeze
* nudge thread!
*/
bb->flags &= ~FRAG_COARSE_GRAIN;
STATS_INC(coarse_prevent_cti);
bb_add_native_direct_xfer(dcontext, bb, true );
return true;
} else {
if (DYNAMO_OPTION(coarse_split_calls) && DYNAMO_OPTION(coarse_units) &&
TEST(FRAG_COARSE_GRAIN, bb->flags)) {
if (instrlist_first(bb->ilist) != bb->instr) {
bb_stop_prior_to_instr(dcontext, bb, true );
return false;
} else {
bb->flags &= ~FRAG_COARSE_GRAIN;
STATS_INC(coarse_prevent_cti);
}
}
if (bb->follow_direct && !must_not_be_entered(callee) &&
bb->num_elide_call < DYNAMO_OPTION(max_elide_call) &&
(DYNAMO_OPTION(elide_back_calls) || bb->cur_pc <= callee)) {
if (check_new_page_jmp(dcontext, bb, callee)) {
bb->num_elide_call++;
STATS_INC(total_elided_calls);
STATS_TRACK_MAX(max_elided_calls, bb->num_elide_call);
bb->cur_pc = callee;
BBPRINT(bb, 4, " continuing in callee at " PFX "\n", bb->cur_pc);
return true;
}
}
BBPRINT(bb, 3, " NOT following direct call from " PFX " to " PFX "\n",
bb->instr_start, callee);
if (instr_is_cbr(bb->instr)) {
instr_exit_branch_set_type(bb->instr, instr_branch_type(bb->instr));
} else {
bb->exit_target = callee;
}
return false;
}
return true;
}
#ifdef WINDOWS
* "call (%xdx); mov %xsp -> %xdx" or "call %xdx; mov %xsp -> %xdx"
* and "sysenter".
*/
bool
instr_is_call_sysenter_pattern(instr_t *call, instr_t *mov, instr_t *sysenter)
{
instr_t *instr;
if (call == NULL || mov == NULL || sysenter == NULL)
return false;
if (instr_is_meta(call) || instr_is_meta(mov) || instr_is_meta(sysenter))
return false;
if (instr_get_next(call) != mov || instr_get_next(mov) != sysenter)
return false;
if (instr_get_opcode(sysenter) != OP_sysenter)
return false;
* changes in the register dataflow and call construct are tolerated. */
instr = mov;
if (!(instr != NULL && instr_get_opcode(instr) == OP_mov_ld &&
instr_num_srcs(instr) == 1 && instr_num_dsts(instr) == 1 &&
opnd_is_reg(instr_get_dst(instr, 0)) &&
opnd_get_reg(instr_get_dst(instr, 0)) == REG_XDX &&
opnd_is_reg(instr_get_src(instr, 0)) &&
opnd_get_reg(instr_get_src(instr, 0)) == REG_XSP)) {
return false;
}
* for ind->direct call conversion? */
instr = call;
if (!(instr != NULL && TEST(INSTR_IND_CALL_DIRECT, instr->flags) &&
instr_is_call_indirect(instr) &&
opnd_is_reg(instr_get_src(instr, 1)) &&
opnd_get_reg(instr_get_src(instr, 1)) == REG_XSP &&
((opnd_is_near_base_disp(instr_get_src(instr, 0)) &&
opnd_get_base(instr_get_src(instr, 0)) == REG_XDX &&
opnd_get_disp(instr_get_src(instr, 0)) == 0) ||
(opnd_is_reg(instr_get_src(instr, 0)) &&
opnd_get_reg(instr_get_src(instr, 0)) == REG_XDX)))) {
return false;
}
return true;
}
* match the pattern that we expect to precede a sysenter. */
static instr_t *
bb_verify_sysenter_pattern(dcontext_t *dcontext, build_bb_t *bb)
{
* "call (%xdx); mov %xsp -> %xdx" or "call %xdx; mov %xsp -> %xdx"
* just prior to the sysenter.
* We use "xsp" and "xdx" to be ready for x64 sysenter though we don't
* expect to see it.
*/
instr_t *mov, *call;
mov = instr_get_prev_expanded(dcontext, bb->ilist, bb->instr);
if (mov == NULL)
return NULL;
call = instr_get_prev_expanded(dcontext, bb->ilist, mov);
if (call == NULL)
return NULL;
if (!instr_is_call_sysenter_pattern(call, mov, bb->instr)) {
BBPRINT(bb, 3, "bb_verify_sysenter_pattern -- pattern didn't match\n");
return NULL;
}
return call;
}
* mov to fs:[0], but double processing doesn't hurt. */
* per mov -> fs:[0] instruction in the app. So we don't see modified in place
* handler addresses (see at_Borland_SEH_rct_exemption()) or handler addresses
* that are passed into a shared routine that sets up the frame (not yet seen,
* note that MS dlls that have a _SEH_prolog hardcode the handler address in
* the _SEH_prolog routine, only the data is passed in).
*/
static void
bb_process_SEH_push(dcontext_t *dcontext, build_bb_t *bb, void *value)
{
if (value == NULL || value == (void *)PTR_UINT_MINUS_1) {
STATS_INC(num_endlist_SEH_write);
ASSERT_CURIOSITY(value != NULL);
return;
}
LOG(THREAD, LOG_INTERP, 3, "App moving " PFX " to fs:[0]\n", value);
# ifdef RETURN_AFTER_CALL
if (DYNAMO_OPTION(borland_SEH_rct)) {
* imm ret motif for fall through to the finally of a try finally block
* (very similar to what the Microsoft NT at_SEH_rct_exception() is
* doing). The layout will always look like this :
* push e: (imm32) (e should be in the .E/.F table)
* a:
* ...
* b: ret
* c: jmp rel32 (c should be in the .E/.F table)
* d: jmp a: (rel8/32)
* ... (usually nothing)
* e:
* (where ret at b is targeting e, or a valid after call). The
* exception dispatcher calls c (the SEH frame has c as the handler)
* which jmps to the exception handler which, in turn, calls d to
* execute the finally block. Fall through is as shown above. So,
* we see a .E violation for the handlers call to d and a .C violation
* for the fall trough case of the ret @ b targeting e. We may also
* see a .E violation for a call to a as sometimes the handler computes
* the target of the jmp @ d an passes that to a different exception
* handler.
*
* For try-except we see the following layout :
* I've only seen jmp ind in the case that led to needing
* at_Borland_SEH_rct_exemption() to be added, not that
* it makes any difference.
* [ jmp z: (rel8/32) || (rarely) ret || (very rarely) jmp ind]
* x: jmp rel32 (x should be in the .E/.F table)
* y:
* ...
* call rel32
* [z: ... || ret ]
* Though there may be other optimized layouts (the ret instead of the
* jmp z: is one such) so we may not want to rely on anything other
* then x y. The exception dispatcher calls x (the SEH frame has x as
* the handler) which jmps to the exception handler which, in turn,
* jmps to y to execute the except block. We see a .F violation from
* the handler's jmp to y. at_Borland_SEH_rct_exemption() covers a
* case where the address of x (and thus y) in an existing SEH frame
* is changed in place instead of popping and pushing a new frame.
*
* All addresses (rel and otherwise) should be in the same module. So
* we need to recognize the patter and add d:/y: to the .E/.F table
* as well as a: (sometimes the handler calculates the target of d and
* passes that up to a higher level routine, though I don't see the
* point) and add e: to the .C table.
*
* It would be preferable to handle these exemptions reactively at
* the violation point, but unfortunately, by the time we get to the
* violation the SEH frame information has been popped off the stack
* and is lost, so we have to do it pre-emptively here (pattern
* matching at violation time has proven to difficult in the face of
* certain compiler optimizations). See at_Borland_SEH_rct_exemption()
* in callback.c, that could handle all ind branches to y and ind calls
* to d (see below) at an acceptable level of security if we desired.
* Handling the ret @ b to e reactively would require the ability to
* recreate the exact src cti (so we can use the addr of the ret to
* pattern match) at the violation point (something that can't always
* currently be done, reset flushing etc.). Handling the ind call to
* a (which I've never acutally seen, though I've seen the address
* computed and it looks like it could likely be hit) reactively is
* more tricky. Prob. the only way to handle that is to allow .E/.F
* transistions to any address after a push imm32 of an address in the
* same module, but that might be too permissive. FIXME - should still
* revisit doing the exemptions reactively at some point, esp. once we
* can reliably get the src cti.
*/
extern bool seen_Borland_SEH;
* the first two fields (which are all that we use) are constrained by
* ntdll exception dispatcher (see EXCEPTION_REGISTRATION decleration
* in ntdll.h). */
* need is the handler address and it would allow simpler curiosity
* [see 8181] below. If, as is expected, other options make use of
* this routine we'll probably have one shared get of the SEH frame
* anyways. */
typedef struct _borland_seh_frame_t {
EXCEPTION_REGISTRATION reg;
reg_t xbp;
} borland_seh_frame_t;
borland_seh_frame_t frame;
byte target_buf[RET_0_LENGTH + 2 * JMP_LONG_LENGTH];
app_pc handler_jmp_target = NULL;
if (!d_r_safe_read(value, sizeof(frame), &frame)) {
* a valid SEH frame. Xref 8181, borland_seh_frame_t struct is
* bigger then EXCEPTION_REGISTRATION (which is all that is
* required) so verify smaller size is readable. */
ASSERT_CURIOSITY(
sizeof(EXCEPTION_REGISTRATION) < sizeof(frame) &&
d_r_safe_read(value, sizeof(EXCEPTION_REGISTRATION), &frame));
goto post_borland;
}
if (!d_r_safe_read((app_pc)frame.reg.handler - RET_0_LENGTH, sizeof(target_buf),
target_buf)) {
goto post_borland;
}
if (is_jmp_rel32(&target_buf[RET_0_LENGTH], (app_pc)frame.reg.handler,
&handler_jmp_target)) {
app_pc base;
LOG(THREAD, LOG_INTERP, 3,
"Read possible borland SEH frame @" PFX "\n\t"
"next=" PFX " handler=" PFX " xbp=" PFX "\n\t",
value, frame.reg.prev, frame.reg.handler, frame.xbp);
DOLOG(3, LOG_INTERP,
{ dump_buffer_as_bytes(THREAD, target_buf, sizeof(target_buf), 0); });
if ((DYNAMO_OPTION(rct_ind_jump) != OPTION_DISABLED ||
DYNAMO_OPTION(rct_ind_call) != OPTION_DISABLED) &&
rct_ind_branch_target_lookup(
dcontext, (app_pc)frame.reg.handler + JMP_LONG_LENGTH)) {
* frame pop, no need to continue */
STATS_INC(num_borland_SEH_dup_frame);
LOG(THREAD, LOG_INTERP, 3, "Processing duplicate Borland SEH frame\n");
goto post_borland;
}
base = get_module_base((app_pc)frame.reg.handler);
STATS_INC(num_borland_SEH_initial_match);
* of get_module_base(). We are checking the result against a
* known module base (base) so no need to duplicate the is module
* check. FIXME - the checks prob. aren't even necessary given the
* later is_in_code_section checks. Xref case 8171. */
* is_in_code_section() call and check against that before falling
* back on is_in_code_section in case of multiple code sections. */
if (base != NULL && get_allocation_base(handler_jmp_target) == base &&
get_allocation_base(bb->instr_start) == base &&
* verify that frame->handler (x: c:) is on the .E/.F
* table already. We could also try to match known pre x:
* post y: patterns. */
is_in_code_section(base, bb->instr_start, NULL, NULL) &&
is_in_code_section(base, handler_jmp_target, NULL, NULL) &&
is_range_in_code_section(base, (app_pc)frame.reg.handler,
(app_pc)frame.reg.handler + JMP_LONG_LENGTH + 1,
NULL, NULL)) {
app_pc finally_target;
byte push_imm_buf[PUSH_IMM32_LENGTH];
DEBUG_DECLARE(bool ok;)
* to .E/.F table */
STATS_INC(num_borland_SEH_try_match);
LOG(THREAD, LOG_INTERP, 2,
"Found Borland SEH frame adding " PFX " to .E/.F table\n",
(app_pc)frame.reg.handler + JMP_LONG_LENGTH);
if ((DYNAMO_OPTION(rct_ind_jump) != OPTION_DISABLED ||
DYNAMO_OPTION(rct_ind_call) != OPTION_DISABLED)) {
d_r_mutex_lock(&rct_module_lock);
rct_add_valid_ind_branch_target(
dcontext, (app_pc)frame.reg.handler + JMP_LONG_LENGTH);
d_r_mutex_unlock(&rct_module_lock);
}
* callback .C, see notes there */
if (!seen_Borland_SEH) {
SELF_UNPROTECT_DATASEC(DATASEC_RARELY_PROT);
seen_Borland_SEH = true;
SELF_PROTECT_DATASEC(DATASEC_RARELY_PROT);
}
DEBUG_DECLARE(ok =) os_module_set_flag(base, MODULE_HAS_BORLAND_SEH);
ASSERT(ok);
if (target_buf[0] == RAW_OPCODE_ret &&
(is_jmp_rel32(&target_buf[RET_0_LENGTH + JMP_LONG_LENGTH],
(app_pc)frame.reg.handler + JMP_LONG_LENGTH,
&finally_target) ||
is_jmp_rel8(&target_buf[RET_0_LENGTH + JMP_LONG_LENGTH],
(app_pc)frame.reg.handler + JMP_LONG_LENGTH,
&finally_target)) &&
d_r_safe_read(finally_target - sizeof(push_imm_buf),
sizeof(push_imm_buf), push_imm_buf) &&
push_imm_buf[0] == RAW_OPCODE_push_imm32) {
app_pc push_val = *(app_pc *)&push_imm_buf[1];
* and is_in_code_section() usage. */
if (get_allocation_base(finally_target) == base &&
is_in_code_section(base, finally_target, NULL, NULL) &&
get_allocation_base(push_val) == base &&
* .E/.F table, at least for -rct_analyze_at_load */
is_in_code_section(base, push_val, NULL, NULL)) {
* and finally_target (a:) to the .E/.F table */
STATS_INC(num_borland_SEH_finally_match);
LOG(THREAD, LOG_INTERP, 2,
"Found Borland SEH finally frame adding " PFX " to"
" .C table and " PFX " to .E/.F table\n",
push_val, finally_target);
if ((DYNAMO_OPTION(rct_ind_jump) != OPTION_DISABLED ||
DYNAMO_OPTION(rct_ind_call) != OPTION_DISABLED)) {
d_r_mutex_lock(&rct_module_lock);
rct_add_valid_ind_branch_target(dcontext, finally_target);
d_r_mutex_unlock(&rct_module_lock);
}
if (DYNAMO_OPTION(ret_after_call)) {
fragment_add_after_call(dcontext, push_val);
}
} else {
ASSERT_CURIOSITY(false && "partial borland seh finally match");
}
}
}
}
}
post_borland:
# endif
return;
}
* return true if bb should be continued, false if it shouldn't */
static bool
bb_process_fs_ref_opnd(dcontext_t *dcontext, build_bb_t *bb, opnd_t dst, bool *is_to_fs0)
{
ASSERT(is_to_fs0 != NULL);
*is_to_fs0 = false;
if (opnd_is_far_base_disp(dst) &&
opnd_get_segment(dst) == SEG_FS) {
if (bb->instr_start != bb->start_pc) {
* instruction, so we can see it as the first instruction of a
* new bb where we can use the register state. */
* with default options. We do end up with slightly more bb's
* (and associated bookeeping costs), but frequently with MS dlls
* we reduce code cache dupliaction from jmp/call ellision
* (_SEH_[Pro,Epi]log otherwise ends up frequently duplicated for
* instance). */
* a bb built for the next instruction, as we have to have
* reproducible bb building for recreate app state. We should
* only get here through code duplication (typically jmp/call
* inlining, though can also be through multiple entry points into
* the same block of non cti instructions). */
bb_stop_prior_to_instr(dcontext, bb, false );
return false;
}
* this any earlier since have to preserve bb building/ending behavior
* even when not for cache (for recreation etc.). */
if (bb->app_interp) {
* instr_compute_address_ex_priv() in order to handle VSIB) but the
* current usage is just to identify the Borland pattern so that's ok.
*/
if (opnd_compute_address_priv(dst, get_mcontext(dcontext)) == NULL) {
*is_to_fs0 = true;
}
}
}
return true;
}
* also be helpful for other SEH tasks (xref case 5824). */
static bool
bb_process_fs_ref(dcontext_t *dcontext, build_bb_t *bb)
{
ASSERT(DYNAMO_OPTION(process_SEH_push) &&
instr_get_prefix_flag(bb->instr, PREFIX_SEG_FS));
* want to fully decode it, check if it's pushing an SEH frame
* and, if so, pass it to the SEH checking routines (currently
* just used for the Borland SEH rct handling). If this is not
* the first instruction of the bb then we want to stop the bb
* just before this instruction so that when we do process this
* instruction it will be the first in the bb (allowing us to
* use the register state). */
if (!bb->full_decode) {
instr_decode(dcontext, bb->instr);
* decode, FIXME is there a better way to handle this? */
if (!instr_valid(bb->instr)) {
ASSERT_NOT_TESTED();
if (bb->cur_pc == NULL)
bb->cur_pc = bb->instr_start;
bb_process_invalid_instr(dcontext, bb);
return false;
}
ASSERT(instr_get_prefix_flag(bb->instr, PREFIX_SEG_FS));
}
* FIXME - might we see other types we'd want to intercept?
* do we want to proccess pop instructions (usually just for removing
* a frame)? */
if (instr_get_opcode(bb->instr) == OP_mov_st) {
bool is_to_fs0;
opnd_t dst = instr_get_dst(bb->instr, 0);
if (!bb_process_fs_ref_opnd(dcontext, bb, dst, &is_to_fs0))
return false;
* this any earlier since have to preserve bb building/ending behavior
* even when not for cache (for recreation etc.). */
if (bb->app_interp) {
if (is_to_fs0) {
ptr_int_t value = 0;
opnd_t src = instr_get_src(bb->instr, 0);
if (opnd_is_immed_int(src)) {
value = opnd_get_immed_int(src);
} else if (opnd_is_reg(src)) {
value = reg_get_value_priv(opnd_get_reg(src), get_mcontext(dcontext));
} else {
ASSERT_NOT_REACHED();
}
STATS_INC(num_SEH_pushes_processed);
LOG(THREAD, LOG_INTERP, 3, "found mov to fs:[0] @ " PFX "\n",
bb->instr_start);
bb_process_SEH_push(dcontext, bb, (void *)value);
} else {
STATS_INC(num_fs_movs_not_SEH);
}
}
}
# if defined(DEBUG) && defined(INTERNAL)
else if (INTERNAL_OPTION(check_for_SEH_push)) {
int i;
int num_dsts = instr_num_dsts(bb->instr);
for (i = 0; i < num_dsts; i++) {
bool is_to_fs0;
opnd_t dst = instr_get_dst(bb->instr, i);
if (!bb_process_fs_ref_opnd(dcontext, bb, dst, &is_to_fs0)) {
STATS_INC(num_process_SEH_bb_early_terminate_debug);
return false;
}
* SEH frame stored on tos */
if (is_to_fs0) {
if (instr_get_opcode(bb->instr) == OP_pop) {
LOG(THREAD, LOG_INTERP, 4, "found pop to fs:[0] @ " PFX "\n",
bb->instr_start);
STATS_INC(num_process_SEH_pop_fs0);
} else {
LOG(THREAD, LOG_INTERP, 1,
"found unexpected write to fs:[0] @" PFX "\n", bb->instr_start);
DOLOG(1, LOG_INTERP, { d_r_loginst(dcontext, 1, bb->instr, ""); });
ASSERT_CURIOSITY(!is_to_fs0);
}
}
}
}
# endif
return true;
}
#endif
#if defined(UNIX) && !defined(DGC_DIAGNOSTICS) && defined(X86)
* For mov fs/gs => reg/[mem], simply mangle it to write
* the app's fs/gs selector value into dst.
* For mov reg/mem => fs/gs, we make it as the first instruction
* of bb, and mark that bb not linked and has mov_seg instr,
* and change that instruction to be a nop.
* Then whenever before entering code cache, we check if that's the bb
* has mov_seg. If yes, we will update the information we maintained
* about the app's fs/gs.
*/
static bool
bb_process_mov_seg(dcontext_t *dcontext, build_bb_t *bb)
{
reg_id_t seg;
if (!INTERNAL_OPTION(mangle_app_seg))
return true;
ASSERT(instr_num_srcs(bb->instr) == 1);
if (opnd_is_reg(instr_get_src(bb->instr, 0)) &&
reg_is_segment(opnd_get_reg(instr_get_src(bb->instr, 0))))
return true;
ASSERT(instr_num_dsts(bb->instr) == 1);
ASSERT(opnd_is_reg(instr_get_dst(bb->instr, 0)));
seg = opnd_get_reg(instr_get_dst(bb->instr, 0));
ASSERT(reg_is_segment(seg));
if (seg != SEG_GS && seg != SEG_FS)
return true;
if (seg == IF_X64_ELSE(SEG_FS, SEG_FS) && !INTERNAL_OPTION(private_loader))
return true;
if (bb->instr_start == bb->start_pc) {
bb->flags |= FRAG_CANNOT_BE_TRACE;
bb->flags |= FRAG_HAS_MOV_SEG;
return true;
}
LOG(THREAD, LOG_INTERP, 3, "ending bb before mov_seg\n");
* instruction from the bb ilist.
*/
bb->cur_pc = instr_get_raw_bits(bb->instr);
instrlist_remove(bb->ilist, bb->instr);
instr_destroy(dcontext, bb->instr);
bb->instr = NULL;
* breaking traces here shouldn't be a perf issue b/c this is so rare,
* it should happen only once per thread on setting up tls.
*/
bb->flags |= FRAG_MUST_END_TRACE;
return false;
}
#endif
* with *continue_bb indicating if the bb should be continued or not.
* When returning false, continue_bb isn't pertinent.
*/
static bool
bb_process_ignorable_syscall(dcontext_t *dcontext, build_bb_t *bb, int sysnum,
bool *continue_bb)
{
STATS_INC(ignorable_syscalls);
BBPRINT(bb, 3, "found ignorable system call 0x%04x\n", sysnum);
#ifdef WINDOWS
if (get_syscall_method() != SYSCALL_METHOD_SYSENTER) {
DOCHECK(1, {
if (get_syscall_method() == SYSCALL_METHOD_WOW64)
ASSERT_NOT_TESTED();
});
if (continue_bb != NULL)
*continue_bb = true;
return true;
} else {
* after the call to sysenter? */
instr_t *call = bb_verify_sysenter_pattern(dcontext, bb);
if (call != NULL) {
* change the cur_pc to continue at the after-call addr. This is
* safe since the preceding call is in the fragment and
* %xsp/(%xsp) hasn't changed since the call. Obviously, we assume
* that the sysenter breaks control flow in fashion such any
* instruction that follows it isn't reached by DR.
*/
if (DYNAMO_OPTION(ignore_syscalls_follow_sysenter)) {
bb->cur_pc = instr_get_raw_bits(call) + instr_length(dcontext, call);
if (continue_bb != NULL)
*continue_bb = true;
return true;
} else {
* skips the vsyscall 'ret' that's executed natively after the
* syscall and ends up at the correct place.
*/
* with a direct exit stub to the after-call address, which
* is fine. If bb->exit_target < bb->start_pc, the future
* fragment for exit_target is marked as a trace head which
* isn't intended. A potentially undesirable side effect
* is that exit_target's fragment can't be included in
* trace for start_pc.
*/
bb->exit_target = instr_get_raw_bits(call) + instr_length(dcontext, call);
if (continue_bb != NULL)
*continue_bb = false;
return true;
}
}
STATS_INC(ignorable_syscalls_failed_sysenter_pattern);
* can try shared syscall? */
* this case. */
STATS_DEC(ignorable_syscalls);
return false;
}
#elif defined(MACOS) && defined(X86)
if (instr_get_opcode(bb->instr) == OP_sysenter) {
* sysenter wrappers put the retaddr into edx as the post-kernel continuation.
*/
bb->exit_type |= LINK_INDIRECT | LINK_RETURN;
bb->ibl_branch_type = IBL_RETURN;
bb->exit_target = get_ibl_routine(dcontext, get_ibl_entry_type(bb->exit_type),
DEFAULT_IBL_BB(), bb->ibl_branch_type);
LOG(THREAD, LOG_INTERP, 4, "sysenter exit target = " PFX "\n", bb->exit_target);
if (continue_bb != NULL)
*continue_bb = false;
} else if (continue_bb != NULL)
*continue_bb = true;
return true;
#else
if (continue_bb != NULL)
*continue_bb = true;
return true;
#endif
}
#ifdef WINDOWS
static void
bb_process_shared_syscall(dcontext_t *dcontext, build_bb_t *bb, int sysnum)
{
ASSERT(DYNAMO_OPTION(shared_syscalls));
DODEBUG({
if (ignorable_system_call(sysnum, bb->instr, NULL))
STATS_INC(ignorable_syscalls);
else
STATS_INC(optimizable_syscalls);
});
BBPRINT(bb, 3, "found %soptimizable system call 0x%04x\n",
INTERNAL_OPTION(shared_eq_ignore) ? "ignorable-" : "", sysnum);
LOG(THREAD, LOG_INTERP, 3,
"ending bb at syscall & NOT removing the interrupt itself\n");
bb->instr->flags |= INSTR_SHARED_SYSCALL;
bb->flags |= FRAG_MUST_END_TRACE;
* Once a fragment reaches the shared syscall code, it can be safely
* deleted, for example, if the thread is interrupted for a callback and
* DR needs to delete fragments for cache management.
*
* Note that w/shared syscall, syscalls can be executed from TWO
* places -- shared_syscall and do_syscall.
*/
bb->exit_target = shared_syscall_routine(dcontext);
* remove this instruction, so set to NULL so translation does the
* right thing */
bb->instr = NULL;
}
#endif
#ifdef ARM
* and the position of instr in the current IT block, and returns whether
* instr is the last instruction in the block.
*/
static bool
instr_is_last_in_it_block(instr_t *instr, instr_t **it_out, uint *pos_out)
{
instr_t *it;
int num_instrs;
ASSERT(instr != NULL && instr_get_isa_mode(instr) == DR_ISA_ARM_THUMB &&
instr_is_predicated(instr) && instr_is_app(instr));
for (it = instr_get_prev(instr), num_instrs = 1;
it != NULL && num_instrs <= 4 ;
it = instr_get_prev(it)) {
if (instr_is_label(it))
continue;
if (instr_get_opcode(it) == OP_it)
break;
num_instrs++;
}
ASSERT(it != NULL && instr_get_opcode(it) == OP_it);
ASSERT(num_instrs <= instr_it_block_get_count(it));
if (it_out != NULL)
*it_out = it;
if (pos_out != NULL)
*pos_out = num_instrs - 1;
if (num_instrs == instr_it_block_get_count(it))
return true;
return false;
}
static void
adjust_it_instr_for_split(dcontext_t *dcontext, instr_t *it, uint pos)
{
dr_pred_type_t block_pred[IT_BLOCK_MAX_INSTRS];
uint i, block_count = instr_it_block_get_count(it);
byte firstcond[2], mask[2];
DEBUG_DECLARE(bool ok;)
ASSERT(pos < instr_it_block_get_count(it) - 1);
for (i = 0; i < block_count; i++)
block_pred[i] = instr_it_block_get_pred(it, i);
DOCHECK(CHKLVL_ASSERTS, {
instr_t *instr;
for (instr = instr_get_next_app(it), i = 0; instr != NULL;
instr = instr_get_next_app(instr)) {
ASSERT(instr_is_predicated(instr) && i <= pos);
ASSERT(block_pred[i++] == instr_get_predicate(instr));
}
});
DEBUG_DECLARE(ok =)
instr_it_block_compute_immediates(
block_pred[0], (pos > 0) ? block_pred[1] : DR_PRED_NONE,
(pos > 1) ? block_pred[2] : DR_PRED_NONE, DR_PRED_NONE,
&firstcond[0], &mask[0]);
ASSERT(ok);
DOCHECK(CHKLVL_ASSERTS, {
DEBUG_DECLARE(ok =)
instr_it_block_compute_immediates(
block_pred[pos + 1],
(block_count > pos + 2) ? block_pred[pos + 2] : DR_PRED_NONE,
(block_count > pos + 3) ? block_pred[pos + 3] : DR_PRED_NONE,
DR_PRED_NONE,
&firstcond[1], &mask[1]);
ASSERT(ok);
});
ASSERT(opnd_get_immed_int(instr_get_src(it, 0)) == firstcond[0]);
instr_set_src(it, 1, OPND_CREATE_INT(mask[0]));
LOG(THREAD, LOG_INTERP, 3,
"ending bb in an IT block & adjusting the IT instruction\n");
ASSERT_NOT_IMPLEMENTED(false);
}
#endif
static bool
bb_process_non_ignorable_syscall(dcontext_t *dcontext, build_bb_t *bb, int sysnum)
{
BBPRINT(bb, 3, "found non-ignorable system call 0x%04x\n", sysnum);
STATS_INC(non_ignorable_syscalls);
bb->exit_type |= LINK_NI_SYSCALL;
LOG(THREAD, LOG_INTERP, 3, "ending bb at syscall & removing the interrupt itself\n");
#ifdef UNIX
if (instr_get_opcode(bb->instr) ==
IF_X86_ELSE(OP_int, IF_RISCV64_ELSE(OP_ecall, OP_svc))) {
# if defined(MACOS) && defined(X86)
int num = instr_get_interrupt_number(bb->instr);
if (num == 0x81 || num == 0x82) {
bb->exit_type |= LINK_SPECIAL_EXIT;
bb->instr->flags |= INSTR_BRANCH_SPECIAL_EXIT;
} else {
ASSERT(num == 0x80);
# endif
bb->exit_type |= LINK_NI_SYSCALL_INT;
bb->instr->flags |= INSTR_NI_SYSCALL_INT;
# if defined(MACOS) && defined(X86)
}
# endif
} else
#endif
bb->instr->flags |= INSTR_NI_SYSCALL;
#ifdef ARM
if (instr_is_predicated(bb->instr)) {
instr_t *it;
uint pos;
ASSERT(instr_is_syscall(bb->instr));
bb->svc_pred = instr_get_predicate(bb->instr);
if (instr_get_isa_mode(bb->instr) == DR_ISA_ARM_THUMB &&
!instr_is_last_in_it_block(bb->instr, &it, &pos)) {
* modified IT instr. We should adjust the IT instr at mangling
* stage after client instrumentation, but that is complex.
*/
adjust_it_instr_for_split(dcontext, it, pos);
}
}
#endif
bb->instr = NULL;
bb->flags |= FRAG_MUST_END_TRACE;
return false;
}
static inline bool
bb_process_syscall(dcontext_t *dcontext, build_bb_t *bb)
{
int sysnum;
* We give up on inlining but we can still use ignorable/shared syscalls
* and trace continuation.
*/
if (bb->pass_to_client && !bb->post_client)
return false;
#ifdef DGC_DIAGNOSTICS
if (TEST(FRAG_DYNGEN, bb->flags) && !is_dyngen_vsyscall(bb->instr_start)) {
LOG(THREAD, LOG_INTERP, 1, "WARNING: syscall @ " PFX " in dyngen code!\n",
bb->instr_start);
}
#endif
BBPRINT(bb, 4, "interp: syscall @ " PFX "\n", bb->instr_start);
check_syscall_method(dcontext, bb->instr);
bb->flags |= FRAG_HAS_SYSCALL;
* we let bb keep going, else we end bb and flag it
*/
sysnum = find_syscall_num(dcontext, bb->ilist, bb->instr);
#ifdef VMX86_SERVER
DOSTATS({
if (instr_get_opcode(bb->instr) == OP_int &&
instr_get_interrupt_number(bb->instr) == VMKUW_SYSCALL_GATEWAY) {
STATS_INC(vmkuw_syscall_sites);
LOG(THREAD, LOG_SYSCALLS, 2, "vmkuw system call site: #=%d\n", sysnum);
}
});
#endif
BBPRINT(bb, 3, "syscall # is %d\n", sysnum);
if (sysnum != -1 && instrument_filter_syscall(dcontext, sysnum)) {
BBPRINT(bb, 3, "client asking to intercept => pretending syscall # %d is -1\n",
sysnum);
sysnum = -1;
}
#ifdef ARM
if (sysnum != -1 && instr_is_predicated(bb->instr)) {
BBPRINT(bb, 3,
"conditional system calls cannot be inlined => "
"pretending syscall # %d is -1\n",
sysnum);
sysnum = -1;
}
#endif
if (sysnum != -1 && DYNAMO_OPTION(ignore_syscalls) &&
ignorable_system_call(sysnum, bb->instr, NULL)
#ifdef X86
* do not have in-cache support for the post-sysenter continuation: we rely
* for now on very simple sysenter handling where d_r_dispatch uses asynch_target
* to know where to go next.
*/
IF_LINUX(&&instr_get_opcode(bb->instr) != OP_sysenter)
#endif
) {
bool continue_bb;
if (bb_process_ignorable_syscall(dcontext, bb, sysnum, &continue_bb)) {
if (!DYNAMO_OPTION(inline_ignored_syscalls))
continue_bb = false;
return continue_bb;
}
}
#ifdef WINDOWS
if (sysnum != -1 && DYNAMO_OPTION(shared_syscalls) &&
optimizable_system_call(sysnum)) {
bb_process_shared_syscall(dcontext, bb, sysnum);
return false;
}
#endif
return bb_process_non_ignorable_syscall(dcontext, bb, sysnum);
}
* Only sets continue_bb if it returns true.
*/
static bool
bb_process_indcall_syscall(dcontext_t *dcontext, build_bb_t *bb, bool *continue_bb)
{
ASSERT(continue_bb != NULL);
#ifdef WINDOWS
if (instr_is_wow64_syscall(bb->instr)) {
*continue_bb = bb_process_syscall(dcontext, bb);
return true;
}
#endif
return false;
}
static inline bool
bb_process_interrupt(dcontext_t *dcontext, build_bb_t *bb)
{
#if defined(DEBUG) || defined(INTERNAL) || defined(WINDOWS)
int num = instr_get_interrupt_number(bb->instr);
#endif
* We give up on inlining but we can still use ignorable/shared syscalls
* and trace continuation.
* PR 550752: we cannot end at int 0x2d: we live w/ client consequences
*/
if (bb->pass_to_client && !bb->post_client IF_WINDOWS(&&num != 0x2d))
return false;
BBPRINT(bb, 3, "int 0x%x @ " PFX "\n", num, bb->instr_start);
#ifdef WINDOWS
if (num == 0x2b) {
bb->exit_type |= LINK_CALLBACK_RETURN;
BBPRINT(bb, 3, "ending bb at cb ret & removing the interrupt itself\n");
* correct. mangle will destroy the instruction */
bb->instr = NULL;
bb->flags |= FRAG_MUST_END_TRACE;
STATS_INC(num_int2b);
return false;
} else {
SYSLOG_INTERNAL_INFO_ONCE("non-syscall, non-int2b 0x%x @ " PFX " from " PFX, num,
bb->instr_start, bb->start_pc);
}
#endif
return true;
}
* direct call, process it and return true, else, return false.
* FIXME PR 288327: put in linux call* to vsyscall page
*/
static bool
bb_process_convertible_indcall(dcontext_t *dcontext, build_bb_t *bb)
{
#ifdef X86
* and expensive, with a false return should any fail.
*/
instr_t *instr;
opnd_t src0;
instr_t *call_instr;
int call_src_reg;
app_pc callee;
bool vsyscall = false;
if (instr_get_opcode(bb->instr) != OP_call_ind)
return false;
* "mov <imm> -> %reg; call (%reg)" pair. First check for the call.
*/
* while #ifdef-ing the WINDOWS case. It's still ugly though.
*/
instr = bb->instr;
if (!(
# ifdef WINDOWS
(opnd_is_near_base_disp(instr_get_src(instr, 0)) &&
opnd_get_base(instr_get_src(instr, 0)) == REG_XDX &&
opnd_get_disp(instr_get_src(instr, 0)) == 0) ||
# endif
opnd_is_reg(instr_get_src(instr, 0))))
return false;
* bytes and if they could hold a "mov" instruction.
*/
if (!TEST(FRAG_HAS_DIRECT_CTI, bb->flags) && bb->instr_start - 5 >= bb->start_pc) {
byte opcode = *((byte *)bb->instr_start - 5);
* the 0xb8-0xbf range (Intel IA-32 ISA ref, v.2) and specify the
* destination register, i.e., 0xb8 means that %xax is the destination.
*/
if (opcode < 0xb8 || opcode > 0xbf)
return false;
}
src0 = instr_get_src(instr, 0);
call_instr = instr;
instr = instr_get_prev_expanded(dcontext, bb->ilist, bb->instr);
call_src_reg =
opnd_is_near_base_disp(src0) ? opnd_get_base(src0) : opnd_get_reg(src0);
if (instr == NULL || instr_get_opcode(instr) != OP_mov_imm ||
opnd_get_reg(instr_get_dst(instr, 0)) != call_src_reg)
return false;
* thru memory -- just check that the call uses a register.
*/
callee = NULL;
if (opnd_is_reg(src0)) {
callee = (app_pc)opnd_get_immed_int(instr_get_src(instr, 0));
# ifdef WINDOWS
# ifdef PROGRAM_SHEPHERDING
* VSYSCALL_BOOTSTRAP_ADDR? Both are hacky. */
if (is_dyngen_vsyscall((app_pc)opnd_get_immed_int(instr_get_src(instr, 0)))) {
LOG(THREAD, LOG_INTERP, 4,
"Pre-SP2 style indirect call "
"to sysenter found at " PFX "\n",
bb->instr_start);
STATS_INC(num_sysenter_indcalls);
vsyscall = true;
ASSERT(opnd_get_immed_int(instr_get_src(instr, 0)) ==
(ptr_int_t)VSYSCALL_BOOTSTRAP_ADDR);
ASSERT(!use_ki_syscall_routines());
} else
# endif
# endif
STATS_INC(num_convertible_indcalls);
}
# ifdef WINDOWS
* address VSYSCALL_BOOTSTRAP_ADDR (0x7ffe0300) holds the address of
* KiFastSystemCall or (FIXME - not handled) on older platforms KiIntSystemCall.
* FIXME It's unsavory to hard-code 0x7ffe0300, but the constant has little
* context in an SP2 os. It's a hold-over from pre-SP2.
*/
else if (get_syscall_method() == SYSCALL_METHOD_SYSENTER && call_src_reg == REG_XDX &&
opnd_get_immed_int(instr_get_src(instr, 0)) ==
(ptr_int_t)VSYSCALL_BOOTSTRAP_ADDR) {
* value in the immediate field is ok as it's the vsyscall page
* which 1) cannot be made unreadable and 2) cannot be made writable so
* the stored value will not change. Of course, it's possible that the
* os could change the page contents.
*/
callee = (app_pc) * ((ptr_uint_t *)opnd_get_immed_int(instr_get_src(instr, 0)));
if (get_app_sysenter_addr() == NULL) {
* cannot have later recreations have differing behavior, so we must
* handle that case (even though it doesn't matter performance-wise
* as the first call* is usually in runtime init code that's
* executed once). So we do a raw byte compare to:
* ntdll!KiFastSystemCall:
* 7c82ed50 8bd4 mov xdx,xsp
* 7c82ed52 0f34 sysenter
*/
uint raw;
if (!d_r_safe_read(callee, sizeof(raw), &raw) || raw != 0x340fd48b)
callee = NULL;
} else {
* sysenter -- check the sysenter's address as 2 bytes past the callee.
*/
if (callee + 2 != get_app_sysenter_addr())
callee = NULL;
}
vsyscall = (callee != NULL);
ASSERT(use_ki_syscall_routines());
DODEBUG({
if (callee == NULL)
ASSERT_CURIOSITY(false && "call* to vsyscall unexpected mismatch");
else {
LOG(THREAD, LOG_INTERP, 4,
"Post-SP2 style indirect call "
"to sysenter found at " PFX "\n",
bb->instr_start);
STATS_INC(num_sysenter_indcalls);
}
});
}
# endif
* the callee address.
*/
if (callee == NULL)
return false;
if (vsyscall) {
bb->flags &= ~FRAG_COARSE_GRAIN;
STATS_INC(coarse_prevent_indcall);
}
LOG(THREAD, LOG_INTERP, 4,
"interp: possible convertible"
" indirect call from " PFX " to " PFX "\n",
bb->instr_start, callee);
if (leave_call_native(callee) || must_not_be_entered(callee)) {
BBPRINT(bb, 3, " NOT inlining indirect call to " PFX "\n", callee);
bb->flags &= ~FRAG_COARSE_GRAIN;
STATS_INC(coarse_prevent_cti);
ASSERT_CURIOSITY_ONCE(!vsyscall && "leaving call* to vsyscall");
return true;
}
if (bb->follow_direct && !must_not_be_entered(callee) &&
bb->num_elide_call < DYNAMO_OPTION(max_elide_call) &&
(DYNAMO_OPTION(elide_back_calls) || bb->cur_pc <= callee)) {
* direct call's callee. If such code appears in another
* (3rd) place, we should outline it.
* FIXME: use follow_direct_call()
*/
if (vsyscall) {
* (for vsyscall-in-ntdll) we pre-emptively mark as has-syscall.
*/
ASSERT(!TEST(FRAG_HAS_SYSCALL, bb->flags));
bb->flags |= FRAG_HAS_SYSCALL;
}
if (check_new_page_jmp(dcontext, bb, callee)) {
if (vsyscall)
bb->flags &= ~FRAG_HAS_SYSCALL;
bb->num_elide_call++;
STATS_INC(total_elided_calls);
STATS_TRACK_MAX(max_elided_calls, bb->num_elide_call);
bb->cur_pc = callee;
call_instr->flags |= INSTR_IND_CALL_DIRECT;
BBPRINT(bb, 4, " continuing in callee at " PFX "\n", bb->cur_pc);
return true;
}
if (vsyscall) {
* this flag set as soon as we decode a few more instrs and hit the
* syscall itself -- but for pre-sp2 we currently could be elsewhere on
* the same page, so let's be safe here.
*/
bb->flags &= ~FRAG_HAS_SYSCALL;
}
}
BBPRINT(bb, 3, " NOT following indirect call from " PFX " to " PFX "\n",
bb->instr_start, callee);
DODEBUG({
if (vsyscall) {
DO_ONCE({
if (DYNAMO_OPTION(max_elide_call) <= 2)
SYSLOG_INTERNAL_WARNING("leaving call* to vsyscall");
else
ASSERT_CURIOSITY(false && "leaving call* to vsyscall");
});
}
});
;
#elif defined(ARM)
ASSERT_NOT_IMPLEMENTED(false);
#endif
return false;
}
#ifdef X86
static inline app_pc
read_from_IAT(app_pc iat_reference)
{
* the time of checking whether is_in_IAT
*/
return *(app_pc *)iat_reference;
}
* users still have to check the referred to value to verify targeting
* a native module.
*/
static bool
is_targeting_convertible_IAT(dcontext_t *dcontext, instr_t *instr,
app_pc *iat_reference )
{
* if too many writes to its IAT are found,
* even 1 may be too much to handle!
*/
* any registers used for effective address calculation
* can not be guaranteed to be constant dynamically.
*/
* good sign that we should go backwards and look for a possible
* mov IAT[func] -> %reg and then optimize that as well - case 1948
*/
app_pc memory_reference = NULL;
opnd_t opnd = instr_get_target(instr);
LOG(THREAD, LOG_INTERP, 4, "is_targeting_convertible_IAT: ");
* ff 15 8810807c call dword ptr [kernel32+0x1088 (7c801088)]
* where
* [7c801088] = 7c90f04c ntdll!RtlAnsiStringToUnicodeString
*
* The ModR/M byte for a displacement only with no SIB should be
* 15 for CALL, 25 for JMP, (no far versions for IAT)
*/
if (opnd_is_near_base_disp(opnd)) {
IF_X64(ASSERT_NOT_IMPLEMENTED(false));
memory_reference = (app_pc)(ptr_uint_t)opnd_get_disp(opnd);
if (opnd_get_base(opnd) != REG_NULL || opnd_get_index(opnd) != REG_NULL) {
return false;
}
ASSERT(opnd_get_scale(opnd) == 0);
} else {
return false;
}
LOG(THREAD, LOG_INTERP, 3, "is_targeting_convertible_IAT: memory_reference " PFX "\n",
memory_reference);
* be looked up in a separate hashtable based on the IAT base, or
* we'd have to extend the vmareas with custom fields
*/
ASSERT(DYNAMO_OPTION(IAT_convert));
if (vmvector_overlap(IAT_areas, memory_reference, memory_reference + 1)) {
* but even in the unlikely reference by address from another
* module there is really no problem, so not worth checking
*/
ASSERT_CURIOSITY(get_module_base(instr->bytes) ==
get_module_base(memory_reference));
* we have to READ the contents and return that
* safely to the caller so they can convert accordingly
*/
* of a region that has a converted block. Then on a write to
* IAT we can flush efficiently only blocks affected by a
* particular module, for a first hack though flushing
* everything on a hooker will do.
*/
*iat_reference = memory_reference;
return true;
} else {
* e.g. ntdll!RtlUnicodeStringToAnsiString+0x4c:
* ff15c009917c call dword ptr [ntdll!RtlAllocateStringRoutine (7c9109c0)]
*/
return false;
}
}
#endif
* can be converted into a direct call, process it and return true,
* else, return false.
*/
static bool
bb_process_IAT_convertible_indjmp(dcontext_t *dcontext, build_bb_t *bb,
bool *elide_continue)
{
#ifdef X86
app_pc iat_reference;
app_pc target;
ASSERT(DYNAMO_OPTION(IAT_convert));
if (instr_get_opcode(bb->instr) != OP_jmp_ind) {
ASSERT_CURIOSITY(false && "far ind jump");
return false;
}
if (!is_targeting_convertible_IAT(dcontext, bb->instr, &iat_reference)) {
DOSTATS({
if (EXIT_IS_IND_JMP_PLT(bb->exit_type)) {
* fact is not going through IAT
*/
STATS_INC(num_indirect_jumps_PLT_not_IAT);
LOG(THREAD, LOG_INTERP, 3,
"bb_process_IAT_convertible_indjmp: indirect jmp not PLT instr=" PFX
"\n",
bb->instr->bytes);
}
});
return false;
}
target = read_from_IAT(iat_reference);
DOLOG(4, LOG_INTERP, {
char name[MAXIMUM_SYMBOL_LENGTH];
print_symbolic_address(target, name, sizeof(name), false);
LOG(THREAD, LOG_INTERP, 4,
"bb_process_IAT_convertible_indjmp: target=" PFX " %s\n", target, name);
});
STATS_INC(num_indirect_jumps_IAT);
DOSTATS({
if (!EXIT_IS_IND_JMP_PLT(bb->exit_type)) {
* through the IAT other than PLT uses, although a block
* reaching max_elide_call would prevent the above
* match */
STATS_INC(num_indirect_jumps_IAT_not_PLT);
LOG(THREAD, LOG_INTERP, 4,
"bb_process_IAT_convertible_indjmp: indirect jmp not PLT target=" PFX
"\n",
target);
}
});
if (must_not_be_elided(target)) {
ASSERT_NOT_TESTED();
BBPRINT(bb, 3, " NOT inlining indirect jmp to must_not_be_elided " PFX "\n",
target);
return false;
}
* any module may have imported a native DLL. Note it may be
* possible to optimize with a range check on IAT subregions, but
* this check isn't much slower.
*/
*
* FIXME: we also prevent IAT_convert from optimizing imports in
* native_exec_list DLLs, although we could let that convert to a
* direct jump and require native_exec_dircalls to be always on to
* intercept those jmps.
*/
if (DYNAMO_OPTION(native_exec) && is_native_pc(target)) {
BBPRINT(bb, 3, " NOT inlining indirect jump to native exec module " PFX "\n",
target);
STATS_INC(num_indirect_jumps_IAT_native);
return false;
}
* bb_process_ubr(dcontext, bb) but note bb->instr has already
* been appended so has to reverse some of its actions
*/
direct JMP or we'll just continue in target */
instrlist_remove(bb->ilist, bb->instr);
instr_destroy(dcontext, bb->instr);
bb->instr = NULL;
if (DYNAMO_OPTION(IAT_elide)) {
* commonly this really is a disguised CALL*. Yet for PLT use
* of the form of CALL PLT[foo]; JMP* IAT[foo] we would have
* already counted the CALL. If we have tail call elimination
* that converts a CALL* into a JMP* it is also OK to treat as
* a JMP instead of a CALL just as if sharing tails.
*/
if (follow_direct_jump(dcontext, bb, target)) {
LOG(THREAD, LOG_INTERP, 4,
"bb_process_IAT_convertible_indjmp: eliding jmp* target=" PFX "\n",
target);
STATS_INC(num_indirect_jumps_IAT_elided);
*elide_continue = true;
return true;
}
}
* we get the final exit_target added by build_bb_ilist
* FIXME: case 85: which will work only when we're using bb->mangle_ilist
* FIXME: what are callers supposed to see when we do NOT mangle?
*/
LOG(THREAD, LOG_INTERP, 4,
"bb_process_IAT_convertible_indjmp: converting jmp* target=" PFX "\n", target);
STATS_INC(num_indirect_jumps_IAT_converted);
bb->exit_target = target;
*elide_continue = false;
return true;
#elif defined(AARCHXX)
ASSERT_NOT_IMPLEMENTED(false);
return false;
#elif defined(RISCV64)
ASSERT_NOT_IMPLEMENTED(false);
return false;
#endif
}
* through IAT that can be converted into a direct call, process it
* and sets elide_continue. Otherwise function return false.
* OUT elide_continue is set when bb building should continue in target,
* and not set when bb building should be stopped.
*/
static bool
bb_process_IAT_convertible_indcall(dcontext_t *dcontext, build_bb_t *bb,
bool *elide_continue)
{
#ifdef X86
app_pc iat_reference;
app_pc target;
ASSERT(DYNAMO_OPTION(IAT_convert));
* bb_process_IAT_convertible_indjmp, could fuse the two
*/
* and expensive, with a false return should any fail.
*/
if (instr_get_opcode(bb->instr) != OP_call_ind) {
ASSERT_CURIOSITY(false && "far call");
return false;
}
if (!is_targeting_convertible_IAT(dcontext, bb->instr, &iat_reference)) {
return false;
}
target = read_from_IAT(iat_reference);
DOLOG(4, LOG_INTERP, {
char name[MAXIMUM_SYMBOL_LENGTH];
print_symbolic_address(target, name, sizeof(name), false);
LOG(THREAD, LOG_INTERP, 4,
"bb_process_IAT_convertible_indcall: target=" PFX " %s\n", target, name);
});
STATS_INC(num_indirect_calls_IAT);
* bb_process_call_direct(dcontext, bb)
*/
if (leave_call_native(target) || must_not_be_entered(target)) {
ASSERT_NOT_TESTED();
BBPRINT(bb, 3, " NOT inlining indirect call to leave_call_native " PFX "\n",
target);
return false;
}
* any module may have imported a native DLL. Note it may be
* possible to optimize with a range check on IAT subregions, but
* this check isn't much slower.
*/
if (DYNAMO_OPTION(native_exec) && is_native_pc(target)) {
BBPRINT(bb, 3, " NOT inlining indirect call to native exec module " PFX "\n",
target);
STATS_INC(num_indirect_calls_IAT_native);
return false;
}
* bb->instr and will remove bb->instr
* FIXME: it would have been
* better to replace in instrlist with a direct call and have
* mangle_{in,}direct_call use other than the raw bytes, but this for now does the
* job.
*/
bb->instr->flags |= INSTR_IND_CALL_DIRECT;
if (DYNAMO_OPTION(IAT_elide)) {
if (follow_direct_call(dcontext, bb, target)) {
LOG(THREAD, LOG_INTERP, 4,
"bb_process_IAT_convertible_indcall: eliding call* flags=0x%08x "
"target=" PFX "\n",
bb->instr->flags, target);
STATS_INC(num_indirect_calls_IAT_elided);
*elide_continue = true;
return true;
}
}
LOG(THREAD, LOG_INTERP, 4,
"bb_process_IAT_convertible_indcall: converting call* flags=0x%08x target=" PFX
"\n",
bb->instr->flags, target);
STATS_INC(num_indirect_calls_IAT_converted);
* mangle_indirect_call. We don't need to set to NULL, since this
* instr is a CTI and the final jump's translation target should
* still be the original indirect call.
*/
bb->exit_target = target;
* options it should get mangled to a PUSH; JMP
*/
*elide_continue = false;
return true;
#elif defined(AARCHXX)
ASSERT_NOT_IMPLEMENTED(false);
return false;
#elif defined(RISCV64)
ASSERT_NOT_IMPLEMENTED(false);
return false;
#endif
}
static void
bb_process_float_pc(dcontext_t *dcontext, build_bb_t *bb)
{
* (e.g., fxsave), we go back to d_r_dispatch to translate the fp pc.
* We rule out being in a trace (and thus a potential alternative
* would be to use a FRAG_ flag). These are rare instructions so that
* shouldn't have a significant perf impact: except we've been hitting
* libm code that uses fnstenv and is not rare, so we have non-inlined
* translation under an option for now.
*/
if (DYNAMO_OPTION(translate_fpu_pc)) {
bb->exit_type |= LINK_SPECIAL_EXIT;
bb->flags |= FRAG_CANNOT_BE_TRACE;
}
bb->flags &= ~FRAG_COARSE_GRAIN;
}
static bool
instr_will_be_exit_cti(instr_t *inst)
{
return (instr_is_app(inst) && instr_is_cti(inst) &&
(!instr_is_near_call_direct(inst) ||
!leave_call_native(instr_get_branch_target_pc(inst)))
IF_WINDOWS(&&!instr_is_wow64_syscall(inst)));
}
static bool
client_check_syscall(instrlist_t *ilist, instr_t *inst, bool *found_syscall,
bool *found_int)
{
int op_int = IF_X86_ELSE(OP_int, IF_RISCV64_ELSE(OP_ecall, OP_svc));
* a syscall and a call*: PR 240258).
*/
if (instr_is_syscall(inst) || instr_get_opcode(inst) == op_int) {
if (instr_is_syscall(inst) && found_syscall != NULL)
*found_syscall = true;
if (instr_get_opcode(inst) == op_int && found_int != NULL)
*found_int = true;
* pre-syscall-exit jmp is added post client mangling so should
* be robust.
* FIXME: now that we have -no_inline_ignored_syscalls should
* we assert on ignorable also? Probably we'd have to have
* an exception for the middle of a trace?
*/
if (IF_UNIX(TEST(INSTR_NI_SYSCALL, inst->flags))
IF_WINDOWS(instr_is_syscall(inst) ||
((instr_get_opcode(inst) == OP_int &&
instr_get_interrupt_number(inst) == 0x2b)))) {
* check below but we leave it in place in case we add
* other flags in future
*/
if (inst != instrlist_last(ilist)) {
CLIENT_ASSERT(false, "a syscall or interrupt must terminate the block");
return false;
}
* or the client has to deal w/ bad behavior in release build?
*/
}
}
return true;
}
* eflags and other flags that might have changed.
* Returns true normally; returns false to indicate "go native".
*/
static bool
client_process_bb(dcontext_t *dcontext, build_bb_t *bb)
{
dr_emit_flags_t emitflags = DR_EMIT_DEFAULT;
instr_t *inst;
bool found_exit_cti = false;
bool found_syscall = false;
bool found_int = false;
#ifdef ANNOTATIONS
app_pc trailing_annotation_pc = NULL, instrumentation_pc = NULL;
bool found_instrumentation_pc = false;
instr_t *annotation_label = NULL;
#endif
instr_t *last_app_instr = NULL;
* for recreating state, so only call if caller requested it
* (usually that coincides w/ bb->app_interp being set, but not
* when recreating state on a fault (PR 214962)).
* FIXME: hot patches shouldn't be injected during state recreations;
* does predicating on bb->app_interp take care of this issue?
*/
if (!bb->pass_to_client)
return true;
* passed to cliennt.
* FIXME: i#1000, we should present the bb to the client.
* i#1000-c#1: the bb->ilist could be empty.
*/
if (instrlist_first(bb->ilist) == NULL)
return true;
if (!instr_opcode_valid(instrlist_first(bb->ilist)) &&
* to ensure this is a single-instr bb that was built just to
* raise the fault for us.
* XXX i#1000: shouldn't we pass this to the client? It might not handle an
* invalid instr properly though.
*/
instrlist_first(bb->ilist) == instrlist_last(bb->ilist)) {
return true;
}
app_pc tag = bb->pretend_pc == NULL ? bb->start_pc : bb->pretend_pc;
#ifdef LINUX
if (TEST(FRAG_STARTS_RSEQ_REGION, bb->flags)) {
rseq_insert_start_label(dcontext, tag, bb->ilist);
bb->flags &= ~FRAG_STARTS_RSEQ_REGION;
}
#endif
if (!instrument_basic_block(dcontext, tag, bb->ilist, bb->for_trace, !bb->app_interp,
&emitflags)) {
}
if (bb->for_cache && TEST(DR_EMIT_GO_NATIVE, emitflags)) {
LOG(THREAD, LOG_INTERP, 2, "client requested that we go native\n");
SYSLOG_INTERNAL_INFO("thread " TIDFMT " is going native at client request",
d_r_get_thread_id());
dcontext->native_exec_postsyscall = bb->start_pc;
dcontext->next_tag = BACK_TO_NATIVE_AFTER_SYSCALL;
return false;
}
bb->post_client = true;
* or syscall info might be wrong. xref PR 215217
*/
* We need to check for client changes that require a new exit
* target. We can't practically analyze the instrlist to decipher
* the exit, so we'll search backwards and require that the last
* cti is the exit cti. Typically, the last instruction in the
* block should be the exit. Post-mbr and post-syscall positions
* are particularly fragile, as our mangling code sets state up for
* the exit that could be messed up by instrs inserted after the
* mbr/syscall. We thus disallow such instrs (except for
* dr_insert_mbr_instrumentation()). xref cases 10503, 10782, 10784
*
* Here's what we support:
* - more than one exit cti; all but the last must be a ubr
* - an exit cbr or call must be the final instr in the block
* - only one mbr; must be the final instr in the block and the exit target
* - clients can't change the exit of blocks ending in a syscall
* (or int), and the syscall must be the final instr in the block;
* client can, however, remove the syscall and then add a different exit
* - client can't add a translation target that's outside of the original
* source code bounds, or else our cache consistency breaks down
* (the one exception to this is that a jump can translate to its target)
*/
bb->exit_target = NULL;
bb->exit_type = 0;
for (inst = instrlist_last(bb->ilist); inst != NULL; inst = instr_get_prev(inst)) {
if (!instr_opcode_valid(inst))
continue;
if (instr_is_cti(inst) && inst != instrlist_last(bb->ilist)) {
* since decode_fragment(), used for state recreation, can't
* distinguish from exit cti.
* i#665: we now support intra-fragment meta ctis
* to make persistence usable for clients
*/
if (!opnd_is_instr(instr_get_target(inst)) || instr_is_app(inst)) {
bb->flags &= ~FRAG_COARSE_GRAIN;
STATS_INC(coarse_prevent_client);
}
}
if (instr_is_meta(inst)) {
#ifdef ANNOTATIONS
if (is_annotation_label(inst) && last_app_instr == NULL) {
dr_instr_label_data_t *label_data = instr_get_label_data_area(inst);
trailing_annotation_pc = GET_ANNOTATION_APP_PC(label_data);
instrumentation_pc = GET_ANNOTATION_INSTRUMENTATION_PC(label_data);
annotation_label = inst;
}
#endif
continue;
}
#ifdef X86
if (!d_r_is_avx512_code_in_use()) {
if (ZMM_ENABLED()) {
if (instr_may_write_zmm_or_opmask_register(inst)) {
LOG(THREAD, LOG_INTERP, 2, "Detected AVX-512 code in use\n");
d_r_set_avx512_code_in_use(true, NULL);
proc_set_num_simd_saved(MCXT_NUM_SIMD_SLOTS);
}
}
}
#endif
#ifdef ANNOTATIONS
if (instrumentation_pc != NULL && !found_instrumentation_pc &&
instr_get_translation(inst) == instrumentation_pc)
found_instrumentation_pc = true;
#endif
if (last_app_instr == NULL)
last_app_instr = inst;
* cache consistency (both page prot and selfmod) will fail
*/
ASSERT(!bb->for_cache || bb->vmlist != NULL);
* We live w/o the checks there.
*/
CLIENT_ASSERT(
!bb->for_cache ||
vm_list_overlaps(dcontext, bb->vmlist, instr_get_translation(inst),
instr_get_translation(inst) + 1) ||
(instr_is_ubr(inst) && opnd_is_pc(instr_get_target(inst)) &&
instr_get_translation(inst) == opnd_get_pc(instr_get_target(inst)))
* has translation fields set to hooked app routine */
IF_WINDOWS(|| dr_fragment_app_pc(bb->start_pc) != bb->start_pc),
"block's app sources (instr_set_translation() targets) "
"must remain within original bounds");
#ifdef AARCH64
if (instr_get_opcode(inst) == OP_isb) {
CLIENT_ASSERT(inst == instrlist_last(bb->ilist),
"OP_isb must be last instruction in block");
}
#endif
* so do so now to get bb->flags and bb->exit_type set
*/
if (instr_is_syscall(inst) ||
instr_get_opcode(inst) ==
IF_X86_ELSE(OP_int, IF_RISCV64_ELSE(OP_ecall, OP_svc))) {
instr_t *tmp = bb->instr;
bb->instr = inst;
if (instr_is_syscall(bb->instr))
bb_process_syscall(dcontext, bb);
else if (instr_get_opcode(bb->instr) ==
IF_X86_ELSE(OP_int, IF_RISCV64_ELSE(OP_ecall, OP_svc))) {
bb_process_interrupt(dcontext, bb);
}
if (inst != instrlist_last(bb->ilist))
bb->instr = tmp;
}
client_check_syscall(bb->ilist, inst, &found_syscall, &found_int);
if (instr_will_be_exit_cti(inst)) {
if (!found_exit_cti) {
* special flags set above, even if the client doesn't change
* the exit target. We undo such flags after this ilist walk
* to support client removal of syscalls/ints.
* EXIT_IS_IND_JMP_PLT() is used for -IAT_{convert,elide}, which
* is off by default for CI; it's also used for native_exec,
* but we're not sure if we want to support that with CI.
* xref case 10846 and i#198
*/
CLIENT_ASSERT(
!TEST(~(LINK_DIRECT | LINK_INDIRECT | LINK_CALL | LINK_RETURN |
LINK_JMP | LINK_NI_SYSCALL_ALL |
LINK_SPECIAL_EXIT IF_WINDOWS(| LINK_CALLBACK_RETURN)),
bb->exit_type) &&
!EXIT_IS_IND_JMP_PLT(bb->exit_type),
"client unsupported block exit type internal error");
found_exit_cti = true;
bb->instr = inst;
if ((instr_is_near_ubr(inst) || instr_is_near_call_direct(inst))
IF_ARM(&&!instr_is_cbr(inst))) {
CLIENT_ASSERT(instr_is_near_ubr(inst) ||
inst == instrlist_last(bb->ilist) ||
* by their callee target code
*/
DYNAMO_OPTION(max_elide_call) > 0,
"an exit call must terminate the block");
if (inst == last_app_instr) {
bb->exit_target = instr_get_branch_target_pc(inst);
bb->exit_type = instr_branch_type(inst);
}
} else if (instr_is_mbr(inst) ||
instr_is_far_cti(inst)
IF_ARM(
|| instr_get_opcode(inst) == OP_blx)) {
CLIENT_ASSERT(inst == instrlist_last(bb->ilist),
"an exit mbr or far cti must terminate the block");
bb->exit_type = instr_branch_type(inst);
#ifdef ARM
if (instr_get_opcode(inst) == OP_blx)
bb->ibl_branch_type = IBL_INDCALL;
else
#endif
bb->ibl_branch_type = get_ibl_branch_type(inst);
bb->exit_target =
get_ibl_routine(dcontext, get_ibl_entry_type(bb->exit_type),
DEFAULT_IBL_BB(), bb->ibl_branch_type);
} else {
ASSERT(instr_is_cbr(inst));
CLIENT_ASSERT(inst == instrlist_last(bb->ilist),
"an exit cbr must terminate the block");
bb->exit_target = NULL;
bb->exit_type = 0;
instr_exit_branch_set_type(bb->instr, instr_branch_type(inst));
}
* we can check for post-cti code
*/
if (inst != instrlist_last(bb->ilist)) {
if (TEST(FRAG_COARSE_GRAIN, bb->flags)) {
bb->flags &= ~FRAG_COARSE_GRAIN;
STATS_INC(coarse_prevent_client);
}
if (!instr_is_near_call_direct(inst) ||
DYNAMO_OPTION(max_elide_call) == 0)
bb->flags |= FRAG_CANNOT_BE_TRACE;
}
}
* introduce more than one exit cti; we'll just disable traces
* for these fragments.
*/
else {
CLIENT_ASSERT(instr_is_near_ubr(inst) ||
(instr_is_near_call_direct(inst) &&
* by their callee target code
*/
DYNAMO_OPTION(max_elide_call) > 0),
"a second exit cti must be a ubr");
if (!instr_is_near_call_direct(inst) ||
DYNAMO_OPTION(max_elide_call) == 0)
bb->flags |= FRAG_CANNOT_BE_TRACE;
ASSERT(!TEST(FRAG_COARSE_GRAIN, bb->flags));
}
}
}
* syscalls in the middle of a bb.
*/
ASSERT(!DYNAMO_OPTION(inline_ignored_syscalls));
ASSERT((TEST(FRAG_HAS_SYSCALL, bb->flags) && found_syscall) ||
(!TEST(FRAG_HAS_SYSCALL, bb->flags) && !found_syscall));
IF_WINDOWS(ASSERT(!TEST(LINK_CALLBACK_RETURN, bb->exit_type) || found_int));
* client modifications: setting it for a selfmod fragment could
* result in an infinite loop, and it is mainly used for elision, which we
* are not doing for client ctis. Clients are not supposed add new
* app source regions (PR 215217).
*/
if (last_app_instr != NULL) {
bool adjusted_cur_pc = false;
app_pc xl8 = instr_get_translation(last_app_instr);
#ifdef ANNOTATIONS
if (annotation_label != NULL) {
if (found_instrumentation_pc) {
* translation footprint of `bb` to include the annotation (such that
* the next bb starts after the annotation, avoiding duplication).
*/
bb->cur_pc = trailing_annotation_pc;
adjusted_cur_pc = true;
LOG(THREAD, LOG_INTERP, 3,
"BB ends immediately prior to an annotation. "
"Setting `bb->cur_pc` (for fall-through) to " PFX " so that the "
"annotation will be included.\n",
bb->cur_pc);
} else {
* We infer that the client wants to skip the annotation. Remove it now.
*/
instr_t *annotation_next = instr_get_next(annotation_label);
instrlist_remove(bb->ilist, annotation_label);
instr_destroy(dcontext, annotation_label);
if (is_annotation_return_placeholder(annotation_next)) {
instrlist_remove(bb->ilist, annotation_next);
instr_destroy(dcontext, annotation_next);
}
}
}
#endif
#if defined(WINDOWS) && !defined(STANDALONE_DECODER)
* occluded by the corresponding hook, `bb->cur_pc` should point to the original
* app pc (where that instruction was copied from). Cannot use `decode_next_pc()`
* on the original app pc because it is now in the middle of the hook.
*/
if (!adjusted_cur_pc && could_be_hook_occluded_pc(xl8)) {
app_pc intercept_pc = get_intercept_pc_from_app_pc(
xl8, true , false );
if (intercept_pc != NULL) {
app_pc next_intercept_pc = decode_next_pc(dcontext, intercept_pc);
bb->cur_pc = xl8 + (next_intercept_pc - intercept_pc);
adjusted_cur_pc = true;
LOG(THREAD, LOG_INTERP, 3,
"BB ends in the middle of an intercept. "
"Offsetting `bb->cur_pc` (for fall-through) to " PFX " in parallel "
"to intercept instr at " PFX "\n",
intercept_pc, bb->cur_pc);
}
}
#endif
* the length of the translation target
*/
if (!adjusted_cur_pc) {
bb->cur_pc = decode_next_pc(dcontext, xl8);
LOG(THREAD, LOG_INTERP, 3, "setting cur_pc (for fall-through) to " PFX "\n",
bb->cur_pc);
}
* FIXME: I'm not 100% convinced the logic here covers everything
* build_bb_ilist does.
* FIXME: what about if last instr was invalid, or if client adds
* some invalid instrs: xref bb_process_invalid_instr()
*/
if (bb->instr != NULL || (!found_int && !found_syscall))
bb->instr = last_app_instr;
} else
bb->instr = NULL;
* FIXME: should we not do eflags tracking while decoding, then, and always
* do it afterward?
*/
if (!INTERNAL_OPTION(fast_client_decode)) {
bb->eflags =
forward_eflags_analysis(dcontext, bb->ilist, instrlist_first(bb->ilist));
}
if (TEST(DR_EMIT_STORE_TRANSLATIONS, emitflags)) {
bb->flags |= FRAG_HAS_TRANSLATION_INFO;
CLIENT_ASSERT(!INTERNAL_OPTION(fast_client_decode),
"-fast_client_decode not compatible with "
"DR_EMIT_STORE_TRANSLATIONS");
ASSERT(bb->record_translation && bb->full_decode);
}
if (DYNAMO_OPTION(coarse_enable_freeze)) {
* so we avoid undoing savings from -opt_memory with a tool that
* doesn't support persistence.
*/
if (!TEST(DR_EMIT_PERSISTABLE, emitflags)) {
bb->flags &= ~FRAG_COARSE_GRAIN;
STATS_INC(coarse_prevent_client);
}
}
if (TEST(DR_EMIT_MUST_END_TRACE, emitflags)) {
bb->flags |= FRAG_MUST_END_TRACE;
}
return true;
}
#ifdef DR_APP_EXPORTS
static void
mangle_pre_client(dcontext_t *dcontext, build_bb_t *bb)
{
if (bb->start_pc == (app_pc)dr_app_running_under_dynamorio) {
instr_t *ret = instrlist_last(bb->ilist);
instr_t *mov = instr_get_prev(ret);
LOG(THREAD, LOG_INTERP, 3, "Found dr_app_running_under_dynamorio\n");
ASSERT(ret != NULL && instr_is_return(ret) && mov != NULL &&
IF_X86(instr_get_opcode(mov) == OP_mov_imm &&)
IF_ARM(instr_get_opcode(mov) == OP_mov &&
OPND_IS_IMMED_INT(instr_get_src(mov, 0)) &&)
IF_AARCH64(instr_get_opcode(mov) == OP_movz &&)
IF_RISCV64(instr_get_opcode(mov) == OP_addi &&)(
bb->start_pc == instr_get_raw_bits(mov) ||
bb->start_pc == instr_get_translation(mov)));
instr_decode(dcontext, mov);
instr_set_src(mov, 0, OPND_CREATE_INT32(1));
}
}
#endif
* exceeds max_bb_instr. It checks if bb is safe to stop after instruction stop_after.
* On ARM, we do not stop bb building in the middle of an IT block unless there is a
* conditional syscall.
*/
static bool
bb_safe_to_stop(dcontext_t *dcontext, instrlist_t *ilist, instr_t *stop_after)
{
#ifdef ARM
ASSERT(ilist != NULL && instrlist_last(ilist) != NULL);
if (dr_get_isa_mode(dcontext) != DR_ISA_ARM_THUMB)
return true;
if (stop_after == NULL)
stop_after = instrlist_last_app(ilist);
if (instr_get_opcode(stop_after) == OP_it)
return false;
if (!instr_is_predicated(stop_after))
return true;
if (instr_is_cti(stop_after) ||
instr_is_syscall(stop_after) || instr_is_interrupt(stop_after))
return true;
return instr_is_last_in_it_block(stop_after, NULL, NULL);
#endif
return true;
}
* block is found, and prepares the resulting instrlist for creation of
* a fragment, but does not create the fragment, just returns the instrlist.
* Caller is responsible for freeing the list and its instrs!
*
* Input parameters in bb control aspects of creation:
* If app_interp is true, this is considered real app code.
* If pass_to_client is true,
* calls instrument routine on bb->ilist before mangling
* If mangle_ilist is true, mangles the ilist, else leaves it in app form
* If record_vmlist is true, updates the vmareas data structures
* If for_cache is true, bb building lock is assumed to be held.
* record_vmlist should also be true.
* Caller must set and later clear dcontext->bb_build_info.
* For !for_cache, build_bb_ilist() sets and clears it, making the
* assumption that the caller is doing no other reading from the region.
* If record_translation is true, records translation for inserted instrs
* If outf != NULL, does full disassembly with comments to outf
* If overlap_info != NULL, records overlap information for the block in
* the overlap_info (caller must fill in region_start and region_end).
*
* FIXME: now that we have better control over following direct ctis,
* should we have adaptive mechanism to decided whether to follow direct
* ctis, since some bmarks are better doing so (gap, vortex, wupwise)
* and others are worse (apsi, perlbmk)?
*/
DISABLE_NULL_SANITIZER
static void
build_bb_ilist(dcontext_t *dcontext, build_bb_t *bb)
{
* instructions in this basic block, when we take those branches we will
* just make a new basic block and duplicate part of this one
*/
int total_branches = 0;
uint total_instrs = 0;
uint cur_max_bb_instrs = DYNAMO_OPTION(max_bb_instrs);
uint total_writes = 0;
instr_t *non_cti;
byte *non_cti_start_pc;
uint eflags_6 = 0;
#ifdef HOT_PATCHING_INTERFACE
bool hotp_should_inject = false, hotp_injected = false;
#endif
app_pc page_start_pc = (app_pc)NULL;
bool bb_build_nested = false;
* dcontext for allocation; we need separate var for non-global dcontext.
*/
dcontext_t *my_dcontext = get_thread_private_dcontext();
DEBUG_DECLARE(bool regenerated = false;)
bool stop_bb_on_fallthrough = false;
ASSERT(bb->initialized);
* will catch it
*/
ASSERT(bb->vmlist == NULL || !bb->record_vmlist || bb->checked_start_vmarea);
ASSERT(!bb->for_cache || bb->record_vmlist);
#ifdef CUSTOM_TRACES_RET_REMOVAL
my_dcontext->num_calls = 0;
my_dcontext->num_rets = 0;
#endif
if (my_dcontext != NULL) {
if (bb->for_cache) {
ASSERT(bb == (build_bb_t *)my_dcontext->bb_build_info);
} else if (my_dcontext->bb_build_info == NULL) {
my_dcontext->bb_build_info = (void *)bb;
} else {
* and we give up on freeing dangling instr_t and instrlist_t from this
* decode.
* We need the original's for_cache so we know to free the bb_building_lock.
* FIXME: use TRY to handle decode exceptions locally? Shouldn't have
* violation remediations on a !for_cache build.
*/
ASSERT(bb->vmlist == NULL && !bb->for_cache &&
((build_bb_t *)my_dcontext->bb_build_info)->for_cache);
bb_build_nested = true;
}
} else
ASSERT(dynamo_exited);
if ((bb->record_translation && !INTERNAL_OPTION(fast_client_decode)) ||
!bb->for_cache
IF_X64(|| DYNAMO_OPTION(coarse_split_riprel) || DYNAMO_OPTION(x86_to_x64)) ||
INTERNAL_OPTION(full_decode)
IF_LINUX(||
(!vmvector_empty(d_r_rseq_areas) &&
vmvector_overlap(d_r_rseq_areas, bb->start_pc, bb->start_pc + 1))))
bb->full_decode = true;
else {
#ifdef CHECK_RETURNS_SSE2
bb->full_decode = true;
#endif
}
LOG(THREAD, LOG_INTERP, 3,
"\ninterp%s: ", IF_X86_64_ELSE(X64_MODE_DC(dcontext) ? "" : " (x86 mode)", ""));
BBPRINT(bb, 3, "start_pc = " PFX "\n", bb->start_pc);
DOSTATS({
if (bb->app_interp) {
if (fragment_lookup_deleted(dcontext, bb->start_pc)) {
* dup stats if multiple threads have regnerated the
* same private tag, or if a shared tag is deleted and
* multiple privates created
*/
regenerated = true;
STATS_INC(num_fragments_deja_vu);
}
}
});
if (!bb->checked_start_vmarea)
check_new_page_start(dcontext, bb);
#if defined(WINDOWS) && !defined(STANDALONE_DECODER)
* it so instructions are taken from the intercept instead (note that
* `instr_set_translation` will hide this adjustment from the client). N.B.: this
* must follow `check_new_page_start()` (above) or `bb.vmlist` will be wrong.
*/
if (could_be_hook_occluded_pc(bb->start_pc)) {
app_pc intercept_pc = get_intercept_pc_from_app_pc(
bb->start_pc, true , true );
if (intercept_pc != NULL) {
LOG(THREAD, LOG_INTERP, 3,
"Changing start_pc from hook-occluded app pc " PFX " to intercept pc " PFX
"\n",
bb->start_pc, intercept_pc);
bb->start_pc = intercept_pc;
}
}
#endif
bb->cur_pc = bb->start_pc;
* instructions, (i.e. check_for_stopping_point()) */
bb->instr_start = bb->cur_pc;
* on unreadable memory -- though we now properly clean up and won't leak
* on unreadable on any check_thread_vm_area call
*/
bb->ilist = instrlist_create(dcontext);
bb->instr = NULL;
* and the full decode of sandboxing by doing full decode up front
*/
if (TEST(FRAG_SELFMOD_SANDBOXED, bb->flags)) {
bb->full_decode = true;
bb->follow_direct = false;
}
if (TEST(FRAG_HAS_TRANSLATION_INFO, bb->flags)) {
bb->full_decode = true;
bb->record_translation = true;
}
if (my_dcontext != NULL && my_dcontext->single_step_addr == bb->start_pc) {
cur_max_bb_instrs = 1;
}
KSTART(bb_decoding);
while (true) {
if (check_for_stopping_point(dcontext, bb)) {
BBPRINT(bb, 3, "interp: found DynamoRIO stopping point at " PFX "\n",
bb->cur_pc);
break;
}
bb->instr = instr_create(dcontext);
* All we need to decode are control-transfer instructions
* For efficiency, put all non-cti into a single instr_t structure
*/
non_cti_start_pc = bb->cur_pc;
do {
* page that's being decoded. */
if (!bb->record_vmlist && page_start_pc != (app_pc)PAGE_START(bb->cur_pc)) {
page_start_pc = (app_pc)PAGE_START(bb->cur_pc);
set_thread_decode_page_start(my_dcontext == NULL ? dcontext : my_dcontext,
page_start_pc);
}
bb->instr_start = bb->cur_pc;
if (bb->full_decode) {
bb->cur_pc = IF_AARCH64_ELSE(decode_with_ldstex,
decode)(dcontext, bb->cur_pc, bb->instr);
if (bb->record_translation)
instr_set_translation(bb->instr, bb->instr_start);
} else {
instr_reset(dcontext, bb->instr);
bb->cur_pc = IF_AARCH64_ELSE(decode_cti_with_ldstex,
decode_cti)(dcontext, bb->cur_pc, bb->instr);
#if defined(ANNOTATIONS) && !(defined(X64) && defined(WINDOWS))
if (is_encoded_valgrind_annotation_tail(bb->instr_start)) {
if (is_encoded_valgrind_annotation(bb->instr_start, bb->start_pc,
(app_pc)PAGE_START(bb->cur_pc))) {
KSTOP(bb_decoding);
instr_destroy(dcontext, bb->instr);
instrlist_clear_and_destroy(dcontext, bb->ilist);
if (bb->vmlist != NULL) {
vm_area_destroy_list(dcontext, bb->vmlist);
bb->vmlist = NULL;
}
bb->full_decode = true;
build_bb_ilist(dcontext, bb);
return;
}
}
#endif
}
ASSERT(!bb->check_vm_area || bb->checked_end != NULL);
if (bb->check_vm_area && bb->cur_pc != NULL &&
bb->cur_pc - 1 >= bb->checked_end) {
* Ideally we'd want to check BEFORE we decode from the
* subsequent page, as it could be inaccessible, but not worth
* the time estimating the size from a variable number of bytes
* before the page boundary. Instead we rely on other
* mechanisms to handle faults while decoding, which we need
* anyway to handle racy unmaps by the app.
*/
uint old_flags = bb->flags;
DEBUG_DECLARE(bool is_first_instr = (bb->instr_start == bb->start_pc));
if (!check_new_page_contig(dcontext, bb, bb->cur_pc - 1)) {
* incompatible vmarea.
*/
ASSERT(!is_first_instr);
bb->cur_pc = NULL;
stop_bb_on_fallthrough = true;
break;
}
if (!TEST(FRAG_SELFMOD_SANDBOXED, old_flags) &&
TEST(FRAG_SELFMOD_SANDBOXED, bb->flags)) {
* !follow_direct, which are needed for sandboxing. This
* can't happen more than once because sandboxing is now on.
*/
ASSERT(is_first_instr);
bb->full_decode = true;
bb->follow_direct = false;
bb->cur_pc = bb->instr_start;
instr_reset(dcontext, bb->instr);
continue;
}
}
total_instrs++;
DOELOG(3, LOG_INTERP,
{ disassemble_with_bytes(dcontext, bb->instr_start, THREAD); });
if (bb->outf != INVALID_FILE)
disassemble_with_bytes(dcontext, bb->instr_start, bb->outf);
if (!instr_valid(bb->instr))
break;
#ifdef X86
* then we should stop this basic block before getting to it.
*/
if (my_dcontext != NULL && debug_register_fire_on_addr(bb->instr_start)) {
stop_bb_on_fallthrough = true;
break;
}
if (!d_r_is_avx512_code_in_use()) {
if (ZMM_ENABLED()) {
if (instr_get_prefix_flag(bb->instr, PREFIX_EVEX)) {
* for the prefix flag, which for example can be set by
* decode_cti. In client_process_bb, post-client instructions
* are checked with instr_may_write_zmm_register.
*/
LOG(THREAD, LOG_INTERP, 2, "Detected AVX-512 code in use\n");
d_r_set_avx512_code_in_use(true, instr_get_app_pc(bb->instr));
proc_set_num_simd_saved(MCXT_NUM_SIMD_SLOTS);
}
}
}
#endif
* We do this even if -unsafe_ignore_eflags_prefix b/c it doesn't cost that
* much and we can use the analysis to detect any bb that reads a flag
* prior to writing it.
*/
if (bb->eflags != EFLAGS_WRITE_ARITH IF_X86(&&bb->eflags != EFLAGS_READ_OF))
bb->eflags = eflags_analysis(bb->instr, bb->eflags, &eflags_6);
*(== opcode valid) or a possible SEH frame push (if
* -process_SEH_push). */
#ifdef WINDOWS
if (DYNAMO_OPTION(process_SEH_push) &&
instr_get_prefix_flag(bb->instr, PREFIX_SEG_FS)) {
STATS_INC(num_bb_build_fs);
break;
}
#endif
#ifdef X64
if (instr_has_rel_addr_reference(bb->instr)) {
break;
}
#endif
#if defined(UNIX) && defined(X86)
if (INTERNAL_OPTION(mangle_app_seg) &&
instr_get_prefix_flag(bb->instr, PREFIX_SEG_FS | PREFIX_SEG_GS)) {
* only used as hints just after decoding.
* They are not accurate later and can be misleading.
* This can only be used right after decoding for quick check,
* and a walk of operands should be performed to look for
* actual far mem refs.
*/
* pass the instr_opcode_valid check in mangle and be mangled.
*/
instr_get_opcode(bb->instr);
break;
}
#endif
* so instr_opcode_valid(bb->instr) is true, and terminates the loop.
*/
} while (!instr_opcode_valid(bb->instr) && total_instrs <= cur_max_bb_instrs);
if (bb->cur_pc == NULL) {
* after updating stats
*/
bb->cur_pc = bb->instr_start;
}
* May as well set it for all cti's since there's
* really no extra overhead in doing so. Note that we go
* through the above loop only once for cti's, so it's safe
* to set the translation here.
*/
if (instr_opcode_valid(bb->instr) &&
(instr_is_cti(bb->instr) || bb->record_translation))
instr_set_translation(bb->instr, bb->instr_start);
#ifdef HOT_PATCHING_INTERFACE
* In hotp_inject(), address lookup will be done for each instruction
* pc in this bb and patching will be done if an exact match is found.
*
* Hot patching should be done only for app interp and recreating
* pc, not for reproducing app code. Hence we use mangle_ilist.
* See case 5981.
*
* FIXME: this lookup can further be reduced by determining whether or
* not the current bb's module needs patching via check_new_page*
*/
if (DYNAMO_OPTION(hot_patching) && bb->mangle_ilist && !hotp_should_inject) {
* be avoided - messy to hold hotp_vul_table lock like this for
* unnecessary operations. */
bool owns_hotp_lock = self_owns_write_lock(hotp_get_lock());
if (hotp_does_region_need_patch(non_cti_start_pc, bb->cur_pc,
owns_hotp_lock)) {
BBPRINT(bb, 2, "hotpatch match in " PFX ": " PFX "-" PFX "\n",
bb->start_pc, non_cti_start_pc, bb->cur_pc);
hotp_should_inject = true;
* the patch point can be a direct cti; eliding would result
* in the patch not being applied. See case 5901.
* FIXME: we could make this more efficient by only turning
* off follow_direct if the instr is direct cti.
*/
bb->follow_direct = false;
DOSTATS({
if (TEST(FRAG_HAS_DIRECT_CTI, bb->flags))
STATS_INC(hotp_num_frag_direct_cti);
});
}
}
#endif
if (bb->full_decode) {
if (TEST(FRAG_SELFMOD_SANDBOXED, bb->flags) && instr_valid(bb->instr) &&
instr_writes_memory(bb->instr)) {
total_writes++;
if (total_writes > DYNAMO_OPTION(selfmod_max_writes)) {
BBPRINT(bb, 3, "reached selfmod write limit %d, stopping\n",
DYNAMO_OPTION(selfmod_max_writes));
STATS_INC(num_max_selfmod_writes_enforced);
bb_stop_prior_to_instr(dcontext, bb,
false );
break;
}
}
} else if (bb->instr_start != non_cti_start_pc) {
non_cti = instr_create(dcontext);
IF_X64(ASSERT(CHECK_TRUNCATE_TYPE_uint(bb->instr_start - non_cti_start_pc)));
instr_set_raw_bits(non_cti, non_cti_start_pc,
(uint)(bb->instr_start - non_cti_start_pc));
if (bb->record_translation)
instr_set_translation(non_cti, non_cti_start_pc);
instrlist_append(bb->ilist, non_cti);
}
DOSTATS({
* to count app code when we build new bbs, which is indicated by
* the bb->app_interp parameter
*/
if (bb->app_interp && !regenerated) {
* double count when we have multiple entry points into the
* same block of cti free instructinos. */
STATS_ADD(app_code_seen, (bb->cur_pc - non_cti_start_pc));
LOG(THREAD, LOG_INTERP, 5, "adding %d bytes to total app code seen\n",
bb->cur_pc - non_cti_start_pc);
}
});
if (!instr_valid(bb->instr)) {
bb_process_invalid_instr(dcontext, bb);
break;
}
if (stop_bb_on_fallthrough) {
bb_stop_prior_to_instr(dcontext, bb, false );
break;
}
#ifdef ANNOTATIONS
# if !(defined(X64) && defined(WINDOWS))
if (is_decoded_valgrind_annotation_tail(bb->instr)) {
if (is_encoded_valgrind_annotation(bb->instr_start, bb->start_pc,
(app_pc)PAGE_START(bb->cur_pc))) {
instrument_valgrind_annotation(dcontext, bb->ilist, bb->instr,
bb->instr_start, bb->cur_pc, total_instrs);
continue;
}
} else
# endif
if (is_annotation_jump_over_dead_code(bb->instr)) {
instr_t *substitution = NULL;
if (instrument_annotation(
dcontext, &bb->cur_pc,
&substitution _IF_WINDOWS_X64(bb->cur_pc < bb->checked_end))) {
instr_destroy(dcontext, bb->instr);
if (substitution == NULL)
continue;
else
bb->instr = substitution;
}
}
#endif
#ifdef WINDOWS
if (DYNAMO_OPTION(process_SEH_push) &&
instr_get_prefix_flag(bb->instr, PREFIX_SEG_FS)) {
DEBUG_DECLARE(ssize_t dbl_count = bb->cur_pc - bb->instr_start);
if (!bb_process_fs_ref(dcontext, bb)) {
DOSTATS({
if (bb->app_interp) {
LOG(THREAD, LOG_INTERP, 3,
"stopping bb at fs-using instr @ " PFX "\n", bb->instr_start);
STATS_INC(num_process_SEH_bb_early_terminate);
* since we removed it from this bb */
if (!regenerated)
STATS_ADD(app_code_seen, -dbl_count);
}
});
break;
}
}
#else
# if defined(X86) && defined(LINUX)
if (instr_get_prefix_flag(bb->instr,
(SEG_TLS == SEG_GS) ? PREFIX_SEG_GS : PREFIX_SEG_FS)
IF_UNIX(&&!is_in_dynamo_dll(bb->instr_start)) &&
!INTERNAL_OPTION(mangle_app_seg)) {
CLIENT_ASSERT(false,
"no support for app using DR's segment w/o -mangle_app_seg");
ASSERT_BUG_NUM(205276, false);
}
# endif
#endif
if (my_dcontext != NULL && my_dcontext->single_step_addr == bb->instr_start) {
bb_process_single_step(dcontext, bb);
break;
}
if (instr_is_near_ubr(bb->instr)) {
if (bb_process_ubr(dcontext, bb))
continue;
else {
if (bb->instr != NULL)
bb->exit_type |= instr_branch_type(bb->instr);
break;
}
} else
instrlist_append(bb->ilist, bb->instr);
#ifdef RETURN_AFTER_CALL
if (bb->app_interp && dynamo_options.ret_after_call) {
if (instr_is_call(bb->instr)) {
add_return_target(dcontext, bb->instr_start, bb->instr);
}
}
#endif
#ifdef X64
if (DYNAMO_OPTION(coarse_split_riprel) && DYNAMO_OPTION(coarse_units) &&
TEST(FRAG_COARSE_GRAIN, bb->flags) &&
instr_has_rel_addr_reference(bb->instr)) {
if (instrlist_first(bb->ilist) != bb->instr) {
bb_stop_prior_to_instr(dcontext, bb, true );
break;
} else {
bb->flags &= ~FRAG_COARSE_GRAIN;
STATS_INC(coarse_prevent_riprel);
}
}
#endif
if (instr_is_near_call_direct(bb->instr)) {
if (!bb_process_call_direct(dcontext, bb)) {
if (bb->instr != NULL)
bb->exit_type |= instr_branch_type(bb->instr);
break;
}
} else if (instr_is_mbr(bb->instr)
IF_X86(
|| instr_get_opcode(bb->instr) == OP_jmp_far ||
instr_get_opcode(bb->instr) == OP_call_far)
IF_ARM(
|| instr_get_opcode(bb->instr) == OP_blx)) {
* indirect branch processing.
*/
bool normal_indirect_processing = true;
bool elide_and_continue_if_converted = true;
if (instr_is_return(bb->instr)) {
bb->ibl_branch_type = IBL_RETURN;
STATS_INC(num_returns);
} else if (instr_is_call_indirect(bb->instr)) {
STATS_INC(num_all_calls);
STATS_INC(num_indirect_calls);
if (DYNAMO_OPTION(coarse_split_calls) && DYNAMO_OPTION(coarse_units) &&
TEST(FRAG_COARSE_GRAIN, bb->flags)) {
if (instrlist_first(bb->ilist) != bb->instr) {
bb_stop_prior_to_instr(dcontext, bb, true );
break;
} else {
bb->flags &= ~FRAG_COARSE_GRAIN;
STATS_INC(coarse_prevent_cti);
}
}
* bypass normal indirect call processing.
* First, check for a call* that we treat as a syscall.
*/
if (bb_process_indcall_syscall(dcontext, bb,
&elide_and_continue_if_converted)) {
normal_indirect_processing = false;
} else if (DYNAMO_OPTION(indcall2direct) &&
bb_process_convertible_indcall(dcontext, bb)) {
normal_indirect_processing = false;
elide_and_continue_if_converted = true;
} else if (DYNAMO_OPTION(IAT_convert) &&
bb_process_IAT_convertible_indcall(
dcontext, bb, &elide_and_continue_if_converted)) {
normal_indirect_processing = false;
} else
bb->ibl_branch_type = IBL_INDCALL;
#ifdef X86
} else if (instr_get_opcode(bb->instr) == OP_jmp_far) {
bb->ibl_branch_type = IBL_INDJMP;
} else if (instr_get_opcode(bb->instr) == OP_call_far) {
bb->ibl_branch_type = IBL_INDCALL;
#elif defined(ARM)
} else if (instr_get_opcode(bb->instr) == OP_blx) {
bb->ibl_branch_type = IBL_INDCALL;
#endif
} else {
instr_t *prev = instr_get_prev(bb->instr);
if (prev != NULL && instr_opcode_valid(prev) &&
instr_is_call_direct(prev)) {
bb->exit_type |= INSTR_IND_JMP_PLT_EXIT;
only a _likely_ PLT call, we still have to make
sure it goes through IAT - see case 4269
*/
STATS_INC(num_indirect_jumps_likely_PLT);
}
elide_and_continue_if_converted = true;
if (DYNAMO_OPTION(IAT_convert) &&
bb_process_IAT_convertible_indjmp(dcontext, bb,
&elide_and_continue_if_converted)) {
* the PLT to a direct transition (and possibly elided).
* Xref case 7867 for why leaving this flag in the eliding
* case can cause later failures. */
bb->exit_type &= ~INSTR_CALL_EXIT;
normal_indirect_processing = false;
} else
bb->ibl_branch_type = IBL_INDJMP;
STATS_INC(num_indirect_jumps);
}
#ifdef CUSTOM_TRACES_RET_REMOVAL
if (instr_is_return(bb->instr))
my_dcontext->num_rets++;
else if (instr_is_call_indirect(bb->instr))
my_dcontext->num_calls++;
#endif
if (normal_indirect_processing) {
bb->exit_type |= instr_branch_type(bb->instr);
bb->exit_target =
get_ibl_routine(dcontext, get_ibl_entry_type(bb->exit_type),
DEFAULT_IBL_BB(), bb->ibl_branch_type);
LOG(THREAD, LOG_INTERP, 4, "mbr exit target = " PFX "\n",
bb->exit_target);
break;
} else {
if (!elide_and_continue_if_converted)
break;
}
} else if (instr_is_cti(bb->instr) &&
(!instr_is_call(bb->instr) || instr_is_cbr(bb->instr))) {
total_branches++;
if (total_branches >= BRANCH_LIMIT) {
instr_exit_branch_set_type(bb->instr, instr_branch_type(bb->instr));
break;
}
} else if (instr_is_syscall(bb->instr)) {
if (!bb_process_syscall(dcontext, bb))
break;
}
else if (instr_get_opcode(bb->instr) ==
IF_X86_ELSE(OP_int, IF_RISCV64_ELSE(OP_ecall, OP_svc))) {
if (!bb_process_interrupt(dcontext, bb))
break;
}
#ifdef AARCH64
else if (instr_get_opcode(bb->instr) == OP_isb)
break;
#endif
#if 0
else if (instr_get_opcode(bb->instr) == OP_getsec) {
* dynamically handle the leaf functions here, which can change eip
* and other state. We'll need OP_getsec in decode_cti().
*/
}
else if (instr_get_opcode(bb->instr) == OP_xend ||
instr_get_opcode(bb->instr) == OP_xabort) {
* fallback pc recorded by OP_xbegin. We'll need both in decode_cti().
*/
}
#endif
#ifdef CHECK_RETURNS_SSE2
* we perform a simple coarse-grain check here.
*/
else if (instr_is_sse_or_sse2(bb->instr)) {
FATAL_USAGE_ERROR(CHECK_RETURNS_SSE2_XMM_USED, 2, get_application_name(),
get_application_pid());
}
#endif
#if defined(UNIX) && !defined(DGC_DIAGNOSTICS) && defined(X86)
else if (instr_get_opcode(bb->instr) == OP_mov_seg) {
if (!bb_process_mov_seg(dcontext, bb))
break;
}
#endif
else if (instr_saves_float_pc(bb->instr)) {
bb_process_float_pc(dcontext, bb);
break;
}
if (bb->cur_pc == bb->stop_pc) {
BBPRINT(bb, 3, "reached end pc " PFX ", stopping\n", bb->stop_pc);
break;
}
if (total_instrs > DYNAMO_OPTION(max_bb_instrs)) {
* be some degenerate infinite-loop case like a call
* to a function that calls exit() and then calls itself,
* so just end it here, we'll pick up where we left off
* if it's legit
*/
BBPRINT(bb, 3, "reached -max_bb_instrs(%d): %d, ",
DYNAMO_OPTION(max_bb_instrs), total_instrs);
if (bb_safe_to_stop(dcontext, bb->ilist, NULL)) {
BBPRINT(bb, 3, "stopping\n");
STATS_INC(num_max_bb_instrs_enforced);
break;
} else {
* We can either roll-back and find previous safe stop point, or
* simply extend the bb with a few more instructions.
* We can always lower the -max_bb_instrs to offset the additional
* instructions. In contrast, roll-back seems complex and
* potentially problematic.
*/
BBPRINT(bb, 3, "cannot stop, continuing\n");
}
}
}
KSTOP(bb_decoding);
#ifdef DEBUG_MEMORY
ASSERT(bb->instr == NULL ||
(bb->instr->bytes != (byte *)HEAP_UNALLOCATED_PTR_UINT &&
bb->instr->bytes != (byte *)HEAP_ALLOCATED_PTR_UINT &&
bb->instr->bytes != (byte *)HEAP_PAD_PTR_UINT));
#endif
if (!check_new_page_contig(dcontext, bb, bb->cur_pc - 1)) {
ASSERT(false && "Should have checked cur_pc-1 in decode loop");
}
bb->end_pc = bb->cur_pc;
BBPRINT(bb, 3, "end_pc = " PFX "\n\n", bb->end_pc);
#ifdef LINUX
if (TEST(FRAG_HAS_RSEQ_ENDPOINT, bb->flags)) {
instr_t *label = INSTR_CREATE_label(dcontext);
instr_set_note(label, (void *)DR_NOTE_REG_BARRIER);
* truncated the block after that instruction so bb->instr may
* be NULL so we append.
*/
instrlist_meta_append(bb->ilist, label);
}
#endif
* for native_exec overlap, but selfmod ubrs don't even call that routine
*/
if (DYNAMO_OPTION(native_exec) && DYNAMO_OPTION(native_exec_callcall) &&
!vmvector_empty(native_exec_areas) && bb->app_interp && bb->instr != NULL &&
(instr_is_near_ubr(bb->instr) || instr_is_near_call_direct(bb->instr)) &&
instrlist_first(bb->ilist) == instrlist_last(bb->ilist)) {
* a call to a native_exec dll -- we need to put the gateway at the
* call* to avoid retaddr mangling of the method table call.
* As a side effect we can also handle call*, jmp.
* We don't actually verify or care that it was specifically a call*,
* whatever at_native_exec_gateway() requires to assure itself that we're
* at a return-address-clobberable point.
*/
app_pc tgt = opnd_get_pc(instr_get_target(bb->instr));
if (is_native_pc(tgt) &&
at_native_exec_gateway(dcontext, tgt,
&bb->native_call _IF_DEBUG(true ))) {
LOG(THREAD, LOG_INTERP, 2,
"direct xfer @gateway @" PFX " to native_exec module " PFX "\n",
bb->start_pc, tgt);
bb->native_exec = true;
* for future entrances and b/c .NET changes its method table call
* from targeting a native_exec image to instead target DGC directly,
* thwarting our gateway!
* FIXME: if heap region de-allocated, we'll remove, but what if re-used
* w/o going through syscalls? Just written over w/ something else?
* We'll keep it on native_exec_list...
*/
ASSERT(bb->end_pc == bb->start_pc + DIRECT_XFER_LENGTH);
vmvector_add(native_exec_areas, bb->start_pc, bb->end_pc, NULL);
DODEBUG({ report_native_module(dcontext, tgt); });
STATS_INC(num_native_module_entrances_callcall);
return;
}
}
#ifdef UNIX
* loses control of the app b/c of _dl_runtime_resolve
*/
int ret_imm;
if (DYNAMO_OPTION(native_exec) && DYNAMO_OPTION(native_exec_opt) && bb->app_interp &&
bb->instr != NULL && instr_is_return(bb->instr) &&
at_dl_runtime_resolve_ret(dcontext, bb->start_pc, &ret_imm)) {
dr_insert_clean_call(dcontext, bb->ilist, bb->instr,
(void *)native_module_at_runtime_resolve_ret, false, 2,
opnd_create_reg(REG_XSP), OPND_CREATE_INT32(ret_imm));
}
#endif
STATS_TRACK_MAX(max_instrs_in_a_bb, total_instrs);
if (stop_bb_on_fallthrough && TEST(FRAG_HAS_DIRECT_CTI, bb->flags)) {
* boundary, we can't actually do the elision. See the
* sandbox_last_byte() test case in security-common/sandbox.c. Restart
* bb building without follow_direct. Alternatively, we could check the
* vmareas of the targeted instruction before performing elision.
*/
* hot patching turns off follow_direct, the current bb was elided
* earlier and is marked as selfmod. hotp_num_frag_direct_cti will
* track this for now.
*/
ASSERT(bb->follow_direct);
BBPRINT(bb, 2,
"*** must rebuild bb to avoid following direct cti to "
"incompatible vmarea\n");
STATS_INC(num_bb_end_early);
instrlist_clear_and_destroy(dcontext, bb->ilist);
if (bb->vmlist != NULL) {
vm_area_destroy_list(dcontext, bb->vmlist);
bb->vmlist = NULL;
}
* Try to keep the known flags. We stopped the bb before merging in any
* incompatible flags.
*/
bb->flags &= ~FRAG_HAS_DIRECT_CTI;
bb->follow_direct = false;
bb->exit_type = 0;
bb->exit_target = NULL;
build_bb_ilist(dcontext, bb);
return;
}
if (TEST(FRAG_SELFMOD_SANDBOXED, bb->flags)) {
ASSERT(bb->full_decode);
ASSERT(!bb->follow_direct);
ASSERT(!TEST(FRAG_HAS_DIRECT_CTI, bb->flags));
}
#ifdef HOT_PATCHING_INTERFACE
* can be changed by the client. This will mess up hot patching.
* The same is true for mangling.
*/
if (hotp_should_inject) {
ASSERT(DYNAMO_OPTION(hot_patching));
hotp_injected = hotp_inject(dcontext, bb->ilist);
* accesses dcontext fields directly, so the injected bbs can't be
* shared until that is changed or the clean call mechanism is replaced
* with bb termination to execute hot patchces.
* Case 9995 assumes that hotp fragments are fine-grained, which we
* achieve today by being private; if we make shared we must explicitly
* prevent from being coarse-grained.
*/
if (hotp_injected) {
bb->flags &= ~FRAG_SHARED;
bb->flags |= FRAG_CANNOT_BE_TRACE;
}
}
#endif
* at the default debug build -checklevel 2.
*/
IF_ARM(DOCHECK(2, check_encode_decode_consistency(dcontext, bb->ilist);));
#ifdef DR_APP_EXPORTS
mangle_pre_client(dcontext, bb);
#endif
#ifdef DEBUG
if (bb->for_cache && INTERNAL_OPTION(go_native_at_bb_count) > 0 &&
debug_bb_count++ >= INTERNAL_OPTION(go_native_at_bb_count)) {
SYSLOG_INTERNAL_INFO("thread " TIDFMT " is going native @%d bbs to " PFX,
d_r_get_thread_id(), debug_bb_count - 1, bb->start_pc);
dcontext->native_exec_postsyscall = bb->start_pc;
dcontext->next_tag = BACK_TO_NATIVE_AFTER_SYSCALL;
dynamo_thread_not_under_dynamo(dcontext);
IF_UNIX(os_swap_context(dcontext, true , DR_STATE_GO_NATIVE));
os_process_not_under_dynamorio(dcontext);
bb_build_abort(dcontext, true , false );
return;
}
#endif
if (!client_process_bb(dcontext, bb)) {
bb_build_abort(dcontext, true , false );
return;
}
if (instrlist_get_return_target(bb->ilist) != NULL ||
instrlist_get_fall_through_target(bb->ilist) != NULL) {
CLIENT_ASSERT(instr_is_cbr(instrlist_last(bb->ilist)) ||
instr_is_call(instrlist_last(bb->ilist)),
"instr_set_return_target/instr_set_fall_through_target"
" can only be used in a bb ending with call/cbr");
bb->flags |= FRAG_CANNOT_BE_TRACE;
}
if (bb->unmangled_ilist != NULL)
*bb->unmangled_ilist = instrlist_clone(dcontext, bb->ilist);
if (bb->instr != NULL && instr_opcode_valid(bb->instr) &&
instr_is_far_cti(bb->instr)) {
* cross-mode direct stubs varying in a trace by disallowing
* far cti in middle of trace
*/
bb->flags |= FRAG_MUST_END_TRACE;
bb->flags &= ~FRAG_COARSE_GRAIN;
}
* corresponding to the fall-through of the conditional branch or
* the target of the final indirect branch (the indirect branch itself
* will get mangled into a non-cti)
*/
if (bb->exit_target == NULL) {
bb->exit_target = instrlist_get_fall_through_target(bb->ilist);
if (bb->exit_target == NULL)
bb->exit_target = (cache_pc)bb->cur_pc;
else {
LOG(THREAD, LOG_INTERP, 3, "set fall-throught target " PFX " by client\n",
bb->exit_target);
}
if (bb->instr != NULL && instr_opcode_valid(bb->instr) &&
instr_is_cbr(bb->instr) &&
(int)(bb->exit_target - bb->start_pc) <= SHRT_MAX &&
(int)(bb->exit_target - bb->start_pc) >= SHRT_MIN &&
!instr_is_cti_loop(bb->instr))
bb->flags |= FRAG_CBR_FALLTHROUGH_SHORT;
}
* or syscall-containing ones (to bound delay on threads exiting shared cache,
* for cache management, both consistency and capacity)
* bbs injected with hot patches are also not shared (see case 5272).
*/
if (DYNAMO_OPTION(shared_bbs) && !TEST(FRAG_SELFMOD_SANDBOXED, bb->flags) &&
!TEST(FRAG_TEMP_PRIVATE, bb->flags)
#ifdef HOT_PATCHING_INTERFACE
&& !hotp_injected
#endif
&& (my_dcontext == NULL || my_dcontext->single_step_addr != bb->instr_start)) {
* non-ignorable one -- meaning that the frag will exit the cache
* to execute the syscall -- it can be shared.
* We don't support ignorable syscalls in shared fragments, as they
* don't set at_syscall and so are incompatible w/ -syscalls_synch_flush.
*/
if (!TEST(FRAG_HAS_SYSCALL, bb->flags) ||
TESTANY(LINK_NI_SYSCALL_ALL, bb->exit_type) ||
TEST(LINK_SPECIAL_EXIT, bb->exit_type))
bb->flags |= FRAG_SHARED;
#ifdef WINDOWS
* executed via the version of shared syscall that can be targetted by
* shared frags.
*/
else if (TEST(FRAG_HAS_SYSCALL, bb->flags) &&
DYNAMO_OPTION(shared_fragment_shared_syscalls) &&
bb->exit_target == shared_syscall_routine(dcontext))
bb->flags |= FRAG_SHARED;
else {
ASSERT((TEST(FRAG_HAS_SYSCALL, bb->flags) &&
(DYNAMO_OPTION(ignore_syscalls) ||
(!DYNAMO_OPTION(shared_fragment_shared_syscalls) &&
bb->exit_target == shared_syscall_routine(dcontext)))) &&
"BB not shared for unknown reason");
}
#endif
} else if (my_dcontext != NULL && my_dcontext->single_step_addr == bb->instr_start) {
bb->exit_type |= LINK_SPECIAL_EXIT;
}
if (TEST(FRAG_COARSE_GRAIN, bb->flags) &&
(!TEST(FRAG_SHARED, bb->flags) ||
* decode_fragment() cannot handle -- and on win32 this overlaps w/
* FRAG_MUST_END_TRACE and LINK_NI_SYSCALL
*/
TEST(FRAG_HAS_SYSCALL, bb->flags) || TEST(FRAG_MUST_END_TRACE, bb->flags) ||
TEST(FRAG_CANNOT_BE_TRACE, bb->flags) ||
TEST(FRAG_SELFMOD_SANDBOXED, bb->flags) ||
TEST(FRAG_HAS_TRANSLATION_INFO, bb->flags) ||
* not-inlined call/jmp: we turn off FRAG_COARSE_GRAIN up above
*/
#ifdef WINDOWS
TEST(LINK_CALLBACK_RETURN, bb->exit_type) ||
#endif
TESTANY(LINK_NI_SYSCALL_ALL, bb->exit_type))) {
STATS_INC(num_fine_in_coarse);
DOSTATS({
if (!TEST(FRAG_SHARED, bb->flags))
STATS_INC(coarse_prevent_private);
else if (TEST(FRAG_HAS_SYSCALL, bb->flags))
STATS_INC(coarse_prevent_syscall);
else if (TEST(FRAG_MUST_END_TRACE, bb->flags))
STATS_INC(coarse_prevent_end_trace);
else if (TEST(FRAG_CANNOT_BE_TRACE, bb->flags))
STATS_INC(coarse_prevent_no_trace);
else if (TEST(FRAG_SELFMOD_SANDBOXED, bb->flags))
STATS_INC(coarse_prevent_selfmod);
else if (TEST(FRAG_HAS_TRANSLATION_INFO, bb->flags))
STATS_INC(coarse_prevent_translation);
else if (IF_WINDOWS_ELSE_0(TEST(LINK_CALLBACK_RETURN, bb->exit_type)))
STATS_INC(coarse_prevent_cbret);
else if (TESTANY(LINK_NI_SYSCALL_ALL, bb->exit_type))
STATS_INC(coarse_prevent_syscall);
else
ASSERT_NOT_REACHED();
});
bb->flags &= ~FRAG_COARSE_GRAIN;
}
ASSERT(!TEST(FRAG_COARSE_GRAIN, bb->flags) || !TEST(FRAG_HAS_DIRECT_CTI, bb->flags));
if (!TEST(FRAG_SHARED, bb->flags) && TEST(LINK_INDIRECT, bb->exit_type)) {
ASSERT(bb->exit_target ==
get_ibl_routine(dcontext, get_ibl_entry_type(bb->exit_type),
DEFAULT_IBL_BB(), bb->ibl_branch_type));
bb->exit_target = get_ibl_routine(dcontext, get_ibl_entry_type(bb->exit_type),
IBL_BB_PRIVATE, bb->ibl_branch_type);
}
if (bb->mangle_ilist &&
(bb->instr == NULL || !instr_opcode_valid(bb->instr) ||
!instr_is_near_ubr(bb->instr) || instr_is_meta(bb->instr))) {
instr_t *exit_instr =
XINST_CREATE_jump(dcontext, opnd_create_pc(bb->exit_target));
if (bb->record_translation) {
app_pc translation = NULL;
if (bb->instr == NULL || !instr_opcode_valid(bb->instr)) {
* for special handling (invalid/syscall/int 2b) or there were
* no instructions added (i.e. check_stopping_point in which
* case instr_start == cur_pc), use last instruction's start
* address for the translation */
translation = bb->instr_start;
} else if (instr_is_cti(bb->instr)) {
* the mangling of the cti (since we might not know the target
* if, for ex., its indirect) */
translation = instr_get_translation(bb->instr);
} else {
translation = bb->cur_pc;
ASSERT(bb->cur_pc == bb->exit_target);
}
ASSERT(translation != NULL);
instr_set_translation(exit_instr, translation);
}
* we won't relocate a thread there and re-do a ret pop or call push
*/
instr_set_our_mangling(exit_instr, true);
LOG(THREAD, LOG_EMIT, 3, "exit_branch_type=0x%x bb->exit_target=" PFX "\n",
bb->exit_type, bb->exit_target);
instr_exit_branch_set_type(exit_instr, bb->exit_type);
instrlist_append(bb->ilist, exit_instr);
#ifdef ARM
if (bb->svc_pred != DR_PRED_NONE) {
instr_set_predicate(exit_instr, bb->svc_pred);
exit_instr = XINST_CREATE_jump(dcontext, opnd_create_pc(bb->exit_target));
if (bb->record_translation)
instr_set_translation(exit_instr, bb->cur_pc);
instr_set_our_mangling(exit_instr, true);
instr_exit_branch_set_type(exit_instr, LINK_DIRECT | LINK_JMP);
instrlist_append(bb->ilist, exit_instr);
* so we need reset encode state to avoid holding a dangling pointer.
*/
encode_reset_it_block(dcontext);
}
#endif
}
#ifdef DGC_DIAGNOSTICS
if (TEST(FRAG_DYNGEN, bb->flags))
bb->flags |= FRAG_CANNOT_BE_TRACE;
#endif
if (!INTERNAL_OPTION(unsafe_ignore_eflags_prefix)
IF_X64(|| !INTERNAL_OPTION(unsafe_ignore_eflags_trace))) {
bb->flags |= instr_eflags_to_fragment_eflags(bb->eflags);
if (TEST(FRAG_WRITES_EFLAGS_OF, bb->flags)) {
LOG(THREAD, LOG_INTERP, 4, "fragment writes OF prior to reading it!\n");
STATS_INC(bbs_eflags_writes_of);
} else if (TEST(FRAG_WRITES_EFLAGS_6, bb->flags)) {
IF_X86(ASSERT(TEST(FRAG_WRITES_EFLAGS_OF, bb->flags)));
LOG(THREAD, LOG_INTERP, 4,
"fragment writes all 6 flags prior to reading any\n");
STATS_INC(bbs_eflags_writes_6);
} else {
DOSTATS({
if (bb->eflags == EFLAGS_READ_ARITH) {
* reads one flag and later writes OF, or writes OF and
* later reads one flag before writing that flag.
*/
STATS_INC(bbs_eflags_reads);
} else {
STATS_INC(bbs_eflags_writes_none);
if (TEST(LINK_INDIRECT, bb->exit_type))
STATS_INC(bbs_eflags_writes_none_ind);
}
});
}
}
if (TEST(FRAG_HAS_TRANSLATION_INFO, bb->flags) &&
(!bb->record_translation || !bb->full_decode))
bb->flags &= ~FRAG_HAS_TRANSLATION_INFO;
* app memory so we wait until all done)
*/
if (!bb_build_nested && !bb->for_cache && my_dcontext != NULL) {
ASSERT(my_dcontext->bb_build_info == (void *)bb);
my_dcontext->bb_build_info = NULL;
}
bb->instr = NULL;
if (!bb->mangle_ilist) {
* caller must use full_decode to find invalid instrs and avoid
* a discrepancy w/ for_cache case that aborts b/c of selfmod sandbox
* returning false (in code below)
*/
return;
}
if (!mangle_bb_ilist(dcontext, bb)) {
build_bb_ilist(dcontext, bb);
return;
}
}
* middle of bb building, in order to free resources
*/
void
bb_build_abort(dcontext_t *dcontext, bool clean_vmarea, bool unlock)
{
ASSERT(dcontext->bb_build_info != NULL);
if (dcontext->bb_build_info != NULL) {
build_bb_t *bb = (build_bb_t *)dcontext->bb_build_info;
if (bb->instr != NULL && bb->ilist != NULL &&
instrlist_last(bb->ilist) != bb->instr)
instr_destroy(dcontext, bb->instr);
DODEBUG({ bb->instr = NULL; });
if (bb->ilist != NULL) {
instrlist_clear_and_destroy(dcontext, bb->ilist);
DODEBUG({ bb->ilist = NULL; });
}
if (clean_vmarea) {
* the middle of check_thread_vm_area and had a decode fault
* during code origins checking!)
*/
check_thread_vm_area_abort(dcontext, &bb->vmlist, bb->flags);
}
if (unlock) {
* and on a nested app bb build where !bb->for_cache we do keep the
* original bb info in dcontext (see build_bb_ilist()).
*/
if (bb->has_bb_building_lock) {
ASSERT_OWN_MUTEX(USE_BB_BUILDING_LOCK(), &bb_building_lock);
SHARED_BB_UNLOCK();
KSTOP_REWIND(bb_building);
} else
ASSERT_DO_NOT_OWN_MUTEX(USE_BB_BUILDING_LOCK(), &bb_building_lock);
}
dcontext->bb_build_info = NULL;
}
}
bool
expand_should_set_translation(dcontext_t *dcontext)
{
if (dcontext->bb_build_info != NULL) {
build_bb_t *bb = (build_bb_t *)dcontext->bb_build_info;
* the raw bytes if we're building a bb where we can assume
* the raw byte pointer is the app pc.
*/
return bb->record_translation;
}
return false;
}
* set the bb flags needed to ensure successful mangling 2nd time around
*/
static bool
mangle_bb_ilist(dcontext_t *dcontext, build_bb_t *bb)
{
#ifdef X86
if (TEST(FRAG_SELFMOD_SANDBOXED, bb->flags)) {
byte *selfmod_start, *selfmod_end;
* check_thread_vm_area should have ensured this for us
*/
ASSERT(!TEST(FRAG_HAS_DIRECT_CTI, bb->flags));
LOG(THREAD, LOG_INTERP, 2,
"fragment overlaps selfmod area, inserting sandboxing\n");
* to store app code for each trace bb and update sandbox code
* to point there
*/
bb->flags |= FRAG_CANNOT_BE_TRACE;
if (bb->pretend_pc != NULL) {
selfmod_start = bb->pretend_pc;
selfmod_end = bb->pretend_pc + (bb->cur_pc - bb->start_pc);
} else {
selfmod_start = bb->start_pc;
selfmod_end = bb->cur_pc;
}
if (!insert_selfmod_sandbox(dcontext, bb->ilist, bb->flags, selfmod_start,
selfmod_end, bb->record_translation, bb->for_cache)) {
* in middle, which we don't want to deal w/ for sandboxing!
*/
ASSERT(!bb->full_decode);
LOG(THREAD, LOG_INTERP, 2,
"*** must rebuild bb to avoid invalid instr in middle ***\n");
STATS_INC(num_bb_end_early);
instrlist_clear_and_destroy(dcontext, bb->ilist);
if (bb->vmlist != NULL) {
vm_area_destroy_list(dcontext, bb->vmlist);
bb->vmlist = NULL;
}
bb->flags = FRAG_SELFMOD_SANDBOXED;
bb->full_decode = true;
bb->follow_direct = false;
bb->exit_type = 0;
bb->exit_target = NULL;
return false;
}
STATS_INC(num_sandboxed_fragments);
}
#endif
* beyond "after instrumentation".
*/
DOLOG(5, LOG_INTERP, {
LOG(THREAD, LOG_INTERP, 5, "bb ilist before mangling:\n");
instrlist_disassemble(dcontext, bb->start_pc, bb->ilist, THREAD);
});
d_r_mangle(dcontext, bb->ilist, &bb->flags, true, bb->record_translation);
DOLOG(4, LOG_INTERP, {
LOG(THREAD, LOG_INTERP, 4, "bb ilist after mangling:\n");
instrlist_disassemble(dcontext, bb->start_pc, bb->ilist, THREAD);
});
return true;
}
* block is found, following all the rules that build_bb_ilist follows
* with regard to terminating the block. Does no mangling or anything of
* the app code, though -- this routine is intended only for building the
* original code!
* Caller is responsible for freeing the list and its instrs!
* If outf != INVALID_FILE, does full disassembly with comments to outf.
*/
instrlist_t *
build_app_bb_ilist(dcontext_t *dcontext, byte *start_pc, file_t outf)
{
build_bb_t bb;
init_build_bb(&bb, start_pc, false , false ,
false , false , outf,
0 , NULL );
build_bb_ilist(dcontext, &bb);
return bb.ilist;
}
* following all the rules that DynamoRIO follows internally for
* terminating basic blocks. Note that DynamoRIO does not validate
* that start_pc is actually the first instruction of a basic block.
* \note Caller is reponsible for freeing the list and its instrs!
*/
instrlist_t *
decode_as_bb(void *drcontext, byte *start_pc)
{
build_bb_t bb;
* the interception buffer from the client BB callback. If the
* client asks to decode that address here, we need to decode the
* instructions in the interception buffer instead so that we
* again hide our hooking.
* We will have the jmp from the buffer back to after the hooked
* app code visible to the client (just like it is for the
* real bb built there, so at least we're consistent).
*/
#ifdef WINDOWS
byte *real_pc;
if (is_intercepted_app_pc((app_pc)start_pc, &real_pc))
start_pc = real_pc;
#endif
init_build_bb(&bb, start_pc, false , false ,
false ,
true,
* currently turns on full decode; today we
* provide no way to turn that off, as IR
* expansion routines are not exported (PR 200409). */
INVALID_FILE, 0 , NULL );
build_bb_ilist((dcontext_t *)drcontext, &bb);
return bb.ilist;
}
* the original app code, i.e., no client modifications.
*/
instrlist_t *
decode_trace(void *drcontext, void *tag)
{
dcontext_t *dcontext = (dcontext_t *)drcontext;
fragment_t *frag = fragment_lookup(dcontext, tag);
* (see recreate_fragment_ilist() synch notes)
*/
if (get_thread_private_dcontext() != dcontext)
return NULL;
if (frag != NULL && TEST(FRAG_IS_TRACE, frag->flags)) {
instrlist_t *ilist;
bool alloc_res;
* from cache clean call (nolinking). We disallow asking about
* another thread's private traces.
*/
if (!is_couldbelinking(dcontext))
d_r_mutex_lock(&thread_initexit_lock);
ilist = recreate_fragment_ilist(dcontext, NULL, &frag, &alloc_res,
false ,
false );
ASSERT(!alloc_res);
if (!is_couldbelinking(dcontext))
d_r_mutex_unlock(&thread_initexit_lock);
return ilist;
}
return NULL;
}
app_pc
find_app_bb_end(dcontext_t *dcontext, byte *start_pc, uint flags)
{
build_bb_t bb;
init_build_bb(&bb, start_pc, false , false ,
false , false , INVALID_FILE, flags,
NULL );
build_bb_ilist(dcontext, &bb);
instrlist_clear_and_destroy(dcontext, bb.ilist);
return bb.end_pc;
}
bool
app_bb_overlaps(dcontext_t *dcontext, byte *start_pc, uint flags, byte *region_start,
byte *region_end, overlap_info_t *info_res)
{
build_bb_t bb;
overlap_info_t info;
info.region_start = region_start;
info.region_end = region_end;
init_build_bb(&bb, start_pc, false , false ,
false , false , INVALID_FILE, flags,
&info);
build_bb_ilist(dcontext, &bb);
instrlist_clear_and_destroy(dcontext, bb.ilist);
info.bb_end = bb.end_pc;
if (info_res != NULL)
*info_res = info;
return info.overlap;
}
#ifdef DEBUG
static void
report_native_module(dcontext_t *dcontext, app_pc modpc)
{
char name[MAX_MODNAME_INTERNAL];
const char *modname = name;
if (os_get_module_name_buf(modpc, name, BUFFER_SIZE_ELEMENTS(name)) == 0) {
ASSERT(DYNAMO_OPTION(native_exec_callcall));
modname = "<DGC>";
}
LOG(THREAD, LOG_INTERP | LOG_VMAREAS, 2,
"module %s is on native list, executing natively\n", modname);
STATS_INC(num_native_module_entrances);
SYSLOG_INTERNAL_WARNING_ONCE("module %s set up for native execution", modname);
}
#endif
* assuming app stack and not doing calls out of the cache and not having
* control during dll loads, etc...
*/
static void
build_native_exec_bb(dcontext_t *dcontext, build_bb_t *bb)
{
instr_t *in;
opnd_t jmp_tgt IF_AARCH64(UNUSED);
#if defined(X86) && defined(X64)
bool reachable = rel32_reachable_from_vmcode(bb->start_pc);
#endif
DEBUG_DECLARE(bool ok;)
* be a hole -- for now should work, all protected while native until
* another thread goes into DR
*/
* will take control when coming back, and then goes native.
* N.B.: we ASSUME we reached this moduled via a call --
* build_basic_block_fragment needs to make sure, since we can't verify here
* w/o trying to decode backward from retaddr, and if we're wrong we'll
* clobber the stack and never regain control!
* We also assume this bb is never reached later through a non-call.
*/
ASSERT(bb->initialized);
ASSERT(bb->app_interp);
ASSERT(!bb->record_translation);
ASSERT(bb->start_pc != NULL);
ASSERT(bb->vmlist == NULL || !bb->record_vmlist || bb->checked_start_vmarea);
if (TEST(FRAG_HAS_TRANSLATION_INFO, bb->flags))
bb->flags &= ~FRAG_HAS_TRANSLATION_INFO;
bb->native_exec = true;
BBPRINT(bb, IF_DGCDIAG_ELSE(1, 2), "build_native_exec_bb @" PFX "\n", bb->start_pc);
DOLOG(2, LOG_INTERP,
{ dump_mcontext(get_mcontext(dcontext), THREAD, DUMP_NOT_XML); });
if (!bb->checked_start_vmarea)
check_new_page_start(dcontext, bb);
* on unreadable memory
* WARNING: do not add any app instructions to this ilist!
* If you do you must enable selfmod below.
*/
bb->ilist = instrlist_create(dcontext);
* stack references here. We mark as our own mangling so we'll at
* least return failure from our translate routine.
*/
instrlist_set_our_mangling(bb->ilist, true);
insert_shared_get_dcontext(dcontext, bb->ilist, NULL, true );
instrlist_append(bb->ilist,
instr_create_save_to_dc_via_reg(dcontext, REG_NULL ,
SCRATCH_REG0, SCRATCH_REG0_OFFS));
* Now that we have a stack of native retaddrs, we save the app retaddr in C
* code.
*/
if (bb->native_call) {
dr_insert_clean_call_ex(dcontext, bb->ilist, NULL, (void *)call_to_native,
DR_CLEANCALL_RETURNS_TO_NATIVE, 1,
opnd_create_reg(REG_XSP));
} else {
if (DYNAMO_OPTION(native_exec_opt)) {
insert_return_to_native(dcontext, bb->ilist, NULL, REG_NULL ,
SCRATCH_REG0);
} else {
dr_insert_clean_call_ex(dcontext, bb->ilist, NULL, (void *)return_to_native,
DR_CLEANCALL_RETURNS_TO_NATIVE, 0);
}
}
#if defined(X86) && defined(X64)
if (!reachable) {
* but that requires a post-pass to patch its value: since native_exec
* is already hacky we just go through TLS and ignore multi-thread selfmod.
*/
instrlist_append(
bb->ilist,
INSTR_CREATE_mov_imm(dcontext, opnd_create_reg(SCRATCH_REG0),
OPND_CREATE_INTPTR((ptr_int_t)bb->start_pc)));
if (X64_CACHE_MODE_DC(dcontext) && !X64_MODE_DC(dcontext) &&
DYNAMO_OPTION(x86_to_x64_ibl_opt)) {
jmp_tgt = opnd_create_reg(REG_R9);
} else {
jmp_tgt = opnd_create_tls_slot(os_tls_offset(MANGLE_XCX_SPILL_SLOT));
}
instrlist_append(
bb->ilist, INSTR_CREATE_mov_st(dcontext, jmp_tgt, opnd_create_reg(REG_XAX)));
} else
#endif
{
jmp_tgt = opnd_create_pc(bb->start_pc);
}
instrlist_append(bb->ilist,
instr_create_restore_from_dc_via_reg(dcontext, REG_NULL ,
SCRATCH_REG0,
SCRATCH_REG0_OFFS));
insert_shared_restore_dcontext_reg(dcontext, bb->ilist, NULL);
#if defined(AARCH64) || defined(RISCV64)
ASSERT_NOT_IMPLEMENTED(false);
#else
instrlist_append(bb->ilist,
opnd_is_pc(jmp_tgt) ? XINST_CREATE_jump(dcontext, jmp_tgt)
: XINST_CREATE_jump_mem(dcontext, jmp_tgt));
#endif
* of selfmod only really needed for the jmp to native code)
*/
for (in = instrlist_first(bb->ilist); in != NULL; in = instr_get_next(in))
instr_set_meta(in);
instrlist_append(bb->ilist,
XINST_CREATE_jump(dcontext, opnd_create_pc(bb->start_pc)));
if (DYNAMO_OPTION(shared_bbs) && !TEST(FRAG_TEMP_PRIVATE, bb->flags))
bb->flags |= FRAG_SHARED;
bb->flags &= ~FRAG_COARSE_GRAIN;
STATS_INC(coarse_prevent_native_exec);
* building a trace for simplicity.
* XXX i#1239: DR needs to be able to unlink native exec gateway bbs for
* proper cache consistency and signal handling, in which case we could
* use FRAG_MUST_END_TRACE here instead.
*/
bb->flags |= FRAG_CANNOT_BE_TRACE;
* include any app code (although we mark this bb as belonging to the start
* pc, so we'll get flushed if this region does), and even if target is
* selfmod we're running it natively no matter how it modifies itself. We
* only care that transition to target is via a call or call* so we can
* clobber the retaddr and regain control, and that no retaddr mangling
* happens while native before coming back out. While the former does not
* depend on the target at all, unfortunately we cannot verify the latter.
*/
if (TEST(FRAG_SELFMOD_SANDBOXED, bb->flags))
bb->flags &= ~FRAG_SELFMOD_SANDBOXED;
DEBUG_DECLARE(ok =) mangle_bb_ilist(dcontext, bb);
ASSERT(ok);
#ifdef DEBUG
DOLOG(3, LOG_INTERP, {
LOG(THREAD, LOG_INTERP, 3, "native_exec_bb @" PFX "\n", bb->start_pc);
instrlist_disassemble(dcontext, bb->start_pc, bb->ilist, THREAD);
});
#endif
}
static bool
at_native_exec_gateway(dcontext_t *dcontext, app_pc start,
bool *is_call _IF_DEBUG(bool xfer_target))
{
* or non-inlined direct call from a fragment that will not be flushed.
* For now we will only go native if last_exit was
* a call, a true call*, or a PLT-style call,jmp* (and we detect the latter only
* if the call is inlined, so if the jmp* table is in a DGC-marked region
* or if -no_inline_calls we will miss these: FIXME).
* FIXME: what if have PLT-style but no GOT indirection: call,jmp ?!?
*
* We try to identify funky call* constructions (like
* call*,...,jmp* in case 4269) by examining TOS to see whether it's a
* retaddr -- we do this if last_exit is a jmp* or is unknown (for the
* target_delete ibl path).
*
* FIXME: we will fail to identify a delay-loaded indirect xfer!
* Need to know dynamic link patchup code to look for.
*
* FIXME: we will fail to take over w/ non-call entrances to a dll, like
* NtContinue or direct jmp from DGC.
* we could try to take the top-of-stack value and see if it's a retaddr by
* decoding the prev instr to see if it's a call. decode backwards may have
* issues, and if really want everything will have to do this on every bb,
* not just if lastexit is ind xfer.
*
* We count up easy-to-identify cases we've missed in the DOSTATS below.
*/
bool native_exec_bb = false;
ASSERT(start != (app_pc)back_from_native && start != (app_pc)native_module_callout &&
"interpreting return from native module?");
ASSERT(is_call != NULL);
*is_call = false;
if (DYNAMO_OPTION(native_exec) && !vmvector_empty(native_exec_areas)) {
if (TEST(LINK_CALL , dcontext->last_exit->flags) &&
(DYNAMO_OPTION(native_exec_dircalls) ||
LINKSTUB_INDIRECT(dcontext->last_exit->flags))) {
STATS_INC(num_native_entrance_checks);
if (is_native_pc(start)) {
native_exec_bb = true;
*is_call = true;
DOSTATS({
if (EXIT_IS_CALL(dcontext->last_exit->flags)) {
if (LINKSTUB_INDIRECT(dcontext->last_exit->flags))
STATS_INC(num_native_module_entrances_indcall);
else
STATS_INC(num_native_module_entrances_call);
} else
STATS_INC(num_native_module_entrances_plt);
});
}
}
else if (DYNAMO_OPTION(native_exec_guess_calls) &&
(
(LINKSTUB_INDIRECT(dcontext->last_exit->flags) &&
EXIT_IS_JMP(dcontext->last_exit->flags)) ||
LINKSTUB_FAKE(dcontext->last_exit))) {
* whether it's a retaddr
*/
app_pc *tos = (app_pc *)get_mcontext(dcontext)->xsp;
STATS_INC(num_native_entrance_TOS_checks);
* but after last_exit checks above since overlap is more costly
*/
if (is_native_pc(start) &&
is_readable_without_exception((app_pc)tos, sizeof(app_pc))) {
enum { MAX_CALL_CONSIDER = 6 };
app_pc retaddr = *tos;
LOG(THREAD, LOG_INTERP | LOG_VMAREAS, 2,
"at native_exec target: checking TOS " PFX " => " PFX
" for retaddr\n",
tos, retaddr);
#ifdef RETURN_AFTER_CALL
if (DYNAMO_OPTION(ret_after_call)) {
native_exec_bb = is_observed_call_site(dcontext, retaddr);
*is_call = true;
LOG(THREAD, LOG_INTERP | LOG_VMAREAS, 2,
"native_exec: *TOS is %sa call site in ret-after-call table\n",
native_exec_bb ? "" : "NOT ");
} else {
#endif
if (is_readable_without_exception(retaddr - MAX_CALL_CONSIDER,
MAX_CALL_CONSIDER +
MAX_INSTR_LENGTH)) {
* each byte rather than searching for ff and guessing length
*/
app_pc pc, next_pc;
instr_t instr;
instr_init(dcontext, &instr);
for (pc = retaddr - MAX_CALL_CONSIDER; pc < retaddr; pc++) {
LOG(THREAD, LOG_INTERP | LOG_VMAREAS, 3,
"native_exec: decoding @" PFX " looking for call\n", pc);
instr_reset(dcontext, &instr);
next_pc = IF_AARCH64_ELSE(decode_cti_with_ldstex,
decode_cti)(dcontext, pc, &instr);
STATS_INC(num_native_entrance_TOS_decodes);
if (next_pc == retaddr && instr_is_call(&instr)) {
native_exec_bb = true;
*is_call = true;
LOG(THREAD, LOG_INTERP | LOG_VMAREAS, 2,
"native_exec: found call @ pre-*TOS " PFX "\n", pc);
break;
}
}
instr_free(dcontext, &instr);
}
#ifdef RETURN_AFTER_CALL
}
#endif
DOSTATS({
if (native_exec_bb) {
if (LINKSTUB_FAKE(dcontext->last_exit))
STATS_INC(num_native_module_entrances_TOS_unknown);
else
STATS_INC(num_native_module_entrances_TOS_jmp);
}
});
}
}
* "guess" code above.
*/
if (!native_exec_bb && DYNAMO_OPTION(native_exec_retakeover) &&
LINKSTUB_INDIRECT(dcontext->last_exit->flags) &&
TEST(LINK_RETURN, dcontext->last_exit->flags)) {
if (is_native_pc(start)) {
* callsite where we took over on a module transition.
*/
STATS_INC(num_native_module_entrances_ret);
native_exec_bb = true;
*is_call = false;
}
}
#ifdef UNIX
* (usually _start) cannot return as there is no retaddr.
*/
else if (!native_exec_bb && DYNAMO_OPTION(native_exec_retakeover) &&
LINKSTUB_INDIRECT(dcontext->last_exit->flags) &&
start == get_image_entry()) {
if (is_native_pc(start)) {
native_exec_bb = true;
*is_call = false;
}
}
#endif
DOSTATS({
if (!xfer_target &&
!native_exec_bb && is_native_pc(start)) {
LOG(THREAD, LOG_INTERP | LOG_VMAREAS, 2,
"WARNING: pc " PFX " is on native list but reached bypassing "
"gateway!\n",
start);
STATS_INC(num_native_entrance_miss);
* through a bunch of lastexit-null or indjmp to same dll
*/
ASSERT_CURIOSITY_ONCE(false && "inside native_exec dll");
}
});
}
return native_exec_bb;
}
* Must hold bb_building_lock.
*/
static inline void
init_interp_build_bb(dcontext_t *dcontext, build_bb_t *bb, app_pc start,
uint initial_flags, bool for_trace, instrlist_t **unmangled_ilist)
{
ASSERT_OWN_MUTEX(USE_BB_BUILDING_LOCK() && !TEST(FRAG_TEMP_PRIVATE, initial_flags),
&bb_building_lock);
* that can crash */
ASSERT(dcontext->bb_build_info == NULL);
* to set this up */
dcontext->bb_build_info = (void *)bb;
init_build_bb(
bb, start, true , true , true ,
false , INVALID_FILE,
initial_flags |
(INTERNAL_OPTION(store_translations) ? FRAG_HAS_TRANSLATION_INFO : 0),
NULL );
if (!TEST(FRAG_TEMP_PRIVATE, initial_flags))
bb->has_bb_building_lock = true;
* bb (and hence we don't record translation or do full decode) yet
* a hook when we're ready to call one by storing whether there is a
* hook at translation/decode decision time: now.
*/
if (dr_bb_hook_exists()) {
* Because the module load event is now on 1st exec, we need to trigger
* it now so the client can adjust the null instru list:
*/
check_new_page_start(dcontext, bb);
bb->checked_start_vmarea = true;
if (!os_module_get_flag(bb->start_pc, MODULE_NULL_INSTRUMENT))
bb->pass_to_client = true;
}
* record translation and do full decode. It's racy to check
* dr_trace_hook_exists() here so we rely on trace building having
* set unmangled_ilist.
*/
if (bb->pass_to_client || unmangled_ilist != NULL) {
* by default. This ensures clients can get the correct app address
* of any instruction. We also rely on this for allowing the client
* to return DR_EMIT_STORE_TRANSLATIONS and setting the
* FRAG_HAS_TRANSLATION_INFO flag after decoding the app code.
*
* FIXME: xref case 10070/214505. Currently this means that all
* instructions are fully decoded for client interface builds.
*/
bb->record_translation = true;
* Note that we currently do this anyway to get
* translation fields, but once we fix case 10070 it
* won't be that way.
* We do not let the client turn this off (the runtime
* option is not dynamic, and off by default anyway), as we
* do not export level-handling instr_t routines like *_expand
* for walking instrlists and instr_decode().
*/
bb->full_decode = !INTERNAL_OPTION(fast_client_decode);
bb->for_trace = for_trace;
}
bb->unmangled_ilist = unmangled_ilist;
}
static inline void
exit_interp_build_bb(dcontext_t *dcontext, build_bb_t *bb)
{
ASSERT(dcontext->bb_build_info == (void *)bb);
dcontext->bb_build_info = NULL;
instrlist_clear_and_destroy(dcontext, bb->ilist);
}
* block is found, and then creates a fragment for the basic block.
* DOES NOT look in the hashtable to see if such a fragment already exists!
*/
fragment_t *
build_basic_block_fragment(dcontext_t *dcontext, app_pc start, uint initial_flags,
bool link, bool visible, bool for_trace,
instrlist_t **unmangled_ilist)
{
fragment_t *f;
build_bb_t bb;
dr_where_am_i_t wherewasi = dcontext->whereami;
bool image_entry;
KSTART(bb_building);
dcontext->whereami = DR_WHERE_INTERP;
ASSERT(!RUNNING_WITHOUT_CODE_CACHE());
* so will be the start of a bb
*/
image_entry = check_for_image_entry(start);
init_interp_build_bb(dcontext, &bb, start, initial_flags, for_trace, unmangled_ilist);
if (at_native_exec_gateway(dcontext, start,
&bb.native_call _IF_DEBUG(false ))) {
DODEBUG({ report_native_module(dcontext, bb.start_pc); });
* info, but it also doesn't pass the built bb to the client (it
* contains no app code) so we don't need it. */
bb.record_translation = false;
build_native_exec_bb(dcontext, &bb);
} else {
build_bb_ilist(dcontext, &bb);
if (dcontext->bb_build_info == NULL) {
f = NULL;
goto build_basic_block_fragment_done;
}
if (bb.native_exec) {
bool is_call = bb.native_call;
LOG(THREAD, LOG_INTERP, 2, "replacing built bb with native_exec bb\n");
instrlist_clear_and_destroy(dcontext, bb.ilist);
vm_area_destroy_list(dcontext, bb.vmlist);
dcontext->bb_build_info = NULL;
init_interp_build_bb(dcontext, &bb, start, initial_flags, for_trace,
unmangled_ilist);
* translation info, but it also doesn't pass the built bb to the
* client (it contains no app code) so we don't need it. */
bb.record_translation = false;
bb.native_call = is_call;
build_native_exec_bb(dcontext, &bb);
}
}
* it fine-grained
*/
if (image_entry)
bb.flags &= ~FRAG_COARSE_GRAIN;
if (DYNAMO_OPTION(opt_jit) && visible && is_jit_managed_area(bb.start_pc)) {
ASSERT(bb.overlap_info == NULL || bb.overlap_info->contiguous);
jitopt_add_dgc_bb(bb.start_pc, bb.end_pc, TEST(FRAG_IS_TRACE_HEAD, bb.flags));
}
KSTART(bb_emit);
f = emit_fragment_ex(dcontext, start, bb.ilist, bb.flags, bb.vmlist, link, visible);
KSTOP(bb_emit);
#ifdef CUSTOM_TRACES_RET_REMOVAL
f->num_calls = dcontext->num_calls;
f->num_rets = dcontext->num_rets;
#endif
#ifdef DGC_DIAGNOSTICS
if ((f->flags & FRAG_DYNGEN)) {
LOG(THREAD, LOG_INTERP, 1, "new bb is DGC:\n");
DOLOG(1, LOG_INTERP, { disassemble_app_bb(dcontext, start, THREAD); });
DOLOG(3, LOG_INTERP, { disassemble_fragment(dcontext, f, false); });
}
#endif
DOLOG(2, LOG_INTERP,
{ disassemble_fragment(dcontext, f, d_r_stats->loglevel <= 3); });
DOLOG(4, LOG_INTERP, {
if (TEST(FRAG_SELFMOD_SANDBOXED, f->flags)) {
LOG(THREAD, LOG_INTERP, 4, "\nXXXX sandboxed fragment! original code:\n");
disassemble_app_bb(dcontext, f->tag, THREAD);
LOG(THREAD, LOG_INTERP, 4, "code cache code:\n");
disassemble_fragment(dcontext, f, false);
}
});
if (INTERNAL_OPTION(bbdump_tags)) {
disassemble_fragment_header(dcontext, f, bbdump_file);
}
#ifdef INTERNAL
DODEBUG({
if (INTERNAL_OPTION(stress_recreate_pc)) {
stress_test_recreate(dcontext, f, bb.ilist);
}
});
#endif
exit_interp_build_bb(dcontext, &bb);
build_basic_block_fragment_done:
dcontext->whereami = wherewasi;
KSTOP(bb_building);
return f;
}
* from pc.
* Use recreate_fragment_ilist() for building an instrlist_t for a fragment.
* If check_vm_area is false, Does NOT call check_thread_vm_area()!
* Make sure you know it will terminate at the right spot. It does
* check selfmod and native_exec for elision, but otherwise will
* follow ubrs to the limit. Currently used for
* record_translation_info() (case 3559).
* If vmlist!=NULL and check_vm_area, returns the vmlist, which the
* caller must free by calling vm_area_destroy_list.
*/
instrlist_t *
recreate_bb_ilist(dcontext_t *dcontext, byte *pc, byte *pretend_pc, app_pc stop_pc,
uint flags, uint *res_flags OUT, uint *res_exit_type OUT,
bool check_vm_area, bool mangle, void **vmlist_out OUT,
bool call_client, bool for_trace)
{
build_bb_t bb;
if (!is_readable_without_exception(pc, 4)) {
LOG(THREAD, LOG_INTERP, 3, "recreate_bb_ilist: cannot read memory at " PFX "\n",
pc);
return NULL;
}
LOG(THREAD, LOG_INTERP, 3, "\nbuilding bb instrlist now *********************\n");
init_build_bb(&bb, pc, false , false , mangle,
true , INVALID_FILE, flags, NULL );
* w/o having to include the next instr which might have triggered the bb
* termination but not been included in the bb (i#1441).
* It only applies to full_decode.
*/
bb.stop_pc = stop_pc;
bb.check_vm_area = check_vm_area;
if (check_vm_area && vmlist_out != NULL)
bb.record_vmlist = true;
if (check_vm_area && !bb.record_vmlist)
bb.record_vmlist = true;
* DR_EMIT_STORE_TRANSLATIONS, in which case we shouldn't come here,
* except for traces (see below):
*/
bb.pass_to_client = (DYNAMO_OPTION(code_api) && call_client &&
* its current value should match the value used at
* ilist building time. Alternatively, we could store
* bb->pass_to_client in the fragment.
*/
!os_module_get_flag(pc, MODULE_NULL_INSTRUMENT));
* didn't have DR_EMIT_STORE_TRANSLATIONS on itself (or on any
* for_trace bb if there was no trace hook).
*/
bb.for_trace = for_trace;
* non-meta instrs have translation fields */
if (pretend_pc != pc)
bb.pretend_pc = pretend_pc;
build_bb_ilist(dcontext, &bb);
LOG(THREAD, LOG_INTERP, 3, "\ndone building bb instrlist *********************\n\n");
if (res_flags != NULL)
*res_flags = bb.flags;
if (res_exit_type != NULL)
*res_exit_type = bb.exit_type;
if (check_vm_area && vmlist_out != NULL)
*vmlist_out = bb.vmlist;
else if (bb.record_vmlist)
vm_area_destroy_list(dcontext, bb.vmlist);
return bb.ilist;
}
* passed-in code cache pc, also returns the fragment.
*
* Exactly one of pc and (f_res or *f_res) must be NULL:
* If pc==NULL, assumes that *f_res is the fragment to use;
* else, looks up the fragment, allocating it if necessary.
* If f_res!=NULL, the fragment is returned and whether it was allocated
* is returned in the alloc_res param.
* If f_res==NULL, if the fragment was allocated it is freed here.
*
* NOTE : does not add prefix instructions to the created ilist, if we change
* this to add them be sure to check recreate_app_* for compatibility (for ex.
* adding them and setting their translation to pc would break current
* implementation, also setting translation to NULL would trigger an assert)
*
* Returns NULL if unable to recreate the fragment ilist (fragment not found
* or fragment is pending deletion and app memory might have changed).
* In that case f_res is still pointed at the fragment if it was found, and
* alloc is valid.
*
* For proper synchronization :
* If caller is the dcontext's owner then needs to be couldbelinking, otherwise
* the dcontext's owner should be suspended and the callers should own the
* thread_initexit_lock
*/
instrlist_t *
recreate_fragment_ilist(dcontext_t *dcontext, byte *pc,
fragment_t **f_res, bool *alloc_res,
bool mangle, bool call_client)
{
fragment_t *f;
uint flags = 0;
instrlist_t *ilist;
bool alloc = false;
monitor_data_t md = {
0,
};
dr_isa_mode_t old_mode = DEFAULT_ISA_MODE;
* fragment we just looked up while we are recreating it, if it's the
* caller's dcontext then just need to be couldbelinking, otherwise need
* the thread_initexit_lock since then we are looking up in someone else's
* table (the dcontext's owning thread would also need to be suspended)
*/
ASSERT((dcontext != GLOBAL_DCONTEXT &&
d_r_get_thread_id() == dcontext->owning_thread &&
is_couldbelinking(dcontext)) ||
(ASSERT_OWN_MUTEX(true, &thread_initexit_lock), true));
STATS_INC(num_recreated_fragments);
if (pc == NULL) {
ASSERT(f_res != NULL && *f_res != NULL);
f = *f_res;
} else {
ASSERT(f_res == NULL || *f_res == NULL);
LOG(THREAD, LOG_INTERP, 3, "recreate_fragment_ilist: looking up pc " PFX "\n",
pc);
f = fragment_pclookup_with_linkstubs(dcontext, pc, &alloc);
LOG(THREAD, LOG_INTERP, 3, "\tfound F%d\n", f == NULL ? -1 : f->id);
if (f_res != NULL)
*f_res = f;
* target is pending flush, original memory might no longer be there or
* the memory might have changed. caller should use the stored
* translation info instead.
*/
if (f == NULL || TEST(FRAG_WAS_DELETED, f->flags)) {
ASSERT(f != NULL || !alloc);
ilist = NULL;
goto recreate_fragment_done;
}
}
DEBUG_DECLARE(bool ok =)
dr_set_isa_mode(dcontext, FRAG_ISA_MODE(f->flags), &old_mode);
ASSERT(ok);
if ((f->flags & FRAG_IS_TRACE) == 0) {
ilist = recreate_bb_ilist(dcontext, (byte *)f->tag, (byte *)f->tag,
NULL , 0 , &flags, NULL,
true , mangle, NULL, call_client,
false );
ASSERT(ilist != NULL);
if (ilist == NULL)
goto recreate_fragment_done;
if (PAD_FRAGMENT_JMPS(f->flags))
nop_pad_ilist(dcontext, f, ilist, false );
goto recreate_fragment_done;
} else {
instrlist_t *bb;
byte *apc;
trace_only_t *t = TRACE_FIELDS(f);
uint i;
instr_t *last;
bool mangle_at_end = mangle_trace_at_end();
if (mangle_at_end) {
md.trace_tag = f->tag;
md.trace_flags = f->flags | FRAG_HAS_TRANSLATION_INFO;
md.num_blks = t->num_bbs;
md.blk_info = (trace_bb_build_t *)HEAP_ARRAY_ALLOC(
dcontext, trace_bb_build_t, md.num_blks, ACCT_TRACE, true);
md.pass_to_client = true;
}
ilist = instrlist_create(dcontext);
STATS_INC(num_recreated_traces);
ASSERT(t->bbs != NULL);
for (i = 0; i < t->num_bbs; i++) {
void *vmlist = NULL;
apc = (byte *)t->bbs[i].tag;
bb = recreate_bb_ilist(
dcontext, apc, apc, NULL , 0 , &flags,
&md.final_exit_flags, true , !mangle_at_end,
(mangle_at_end ? &vmlist : NULL), call_client, true );
ASSERT(bb != NULL);
if (bb == NULL) {
instrlist_clear_and_destroy(dcontext, ilist);
vm_area_destroy_list(dcontext, vmlist);
ilist = NULL;
goto recreate_fragment_done;
}
if (mangle_at_end)
md.blk_info[i].info = t->bbs[i];
last = instrlist_last(bb);
ASSERT(last != NULL);
if (mangle_at_end) {
md.blk_info[i].vmlist = vmlist;
md.blk_info[i].final_cti = instr_is_cti(instrlist_last(bb));
}
* While if there's no client trace hook we could mangle and fixup as we
* go, for simplicity we mangle at the end either way (in either case our
* code here is not exactly what we did when we made it anyway)
* PR 333597: we can't use mangle_trace if we have elision on.
*/
if (mangle && !mangle_at_end) {
* - call fixup_last_cti()
* - retarget the ibl routine just like extend_trace() does
*/
app_pc target = (last != NULL) ? opnd_get_pc(instr_get_target(last))
: NULL;
if (target != NULL &&
is_indirect_branch_lookup_routine(dcontext, target)) {
target = get_alternate_ibl_routine(dcontext, target, f->flags);
ASSERT(target != NULL);
LOG(THREAD, LOG_MONITOR, 3,
"recreate_fragment_ilist: replacing ibl_routine to target=" PFX
"\n",
target);
instr_set_target(last, opnd_create_pc(target));
instr_set_our_mangling(last, true);
}
if (DYNAMO_OPTION(pad_jmps) && !INTERNAL_OPTION(pad_jmps_shift_bb)) {
* default. Synchronize changes here with recreate_fragment_ilist.
* This hack is protected by asserts in nop_pad_ilist() (that
* we never add nops to a bb if -pad_jmps_shift_bb) and in
* extend_trace_pad_bytes (that we only add bbs to traces). */
* protective assert in nop_pad_ilist() */
remove_nops_from_ilist(dcontext, bb _IF_DEBUG(true));
}
if (instrlist_last(ilist) != NULL) {
fixup_last_cti(dcontext, ilist, (app_pc)apc, flags, f->flags, NULL,
NULL, true , NULL, NULL, NULL);
}
}
instrlist_append(ilist, instrlist_first(bb));
instrlist_init(bb);
instrlist_destroy(dcontext, bb);
}
IF_AARCH64(fixup_indirect_trace_exit(dcontext, ilist));
* info for modified instrs
*/
if (call_client)
instrument_trace(dcontext, f->tag, ilist, true);
if (mangle) {
if (mangle_at_end) {
if (!mangle_trace(dcontext, ilist, &md)) {
instrlist_clear_and_destroy(dcontext, ilist);
ilist = NULL;
goto recreate_fragment_done;
}
}
#ifdef INTERNAL
if (dynamo_options.optimize) {
* assumption: all optimizations are deterministic and stateless,
* so we can exactly replicate their results
*/
LOG(THREAD_GET, LOG_INTERP, 2, "\tre-applying optimizations to F%d\n",
f->id);
# ifdef SIDELINE
if (dynamo_options.sideline) {
if (!TEST(FRAG_DO_NOT_SIDELINE, f->flags))
optimize_trace(dcontext, f->tag, ilist);
} else
# endif
optimize_trace(dcontext, f->tag, ilist);
}
#endif
* should be called as well
*/
if (PAD_FRAGMENT_JMPS(f->flags))
nop_pad_ilist(dcontext, f, ilist, false );
}
}
recreate_fragment_done:
if (md.blk_info != NULL) {
uint i;
for (i = 0; i < md.num_blks; i++) {
vm_area_destroy_list(dcontext, md.blk_info[i].vmlist);
md.blk_info[i].vmlist = NULL;
}
HEAP_ARRAY_FREE(dcontext, md.blk_info, trace_bb_build_t, md.num_blks, ACCT_TRACE,
true);
}
if (alloc_res != NULL)
*alloc_res = alloc;
if (f_res == NULL && alloc)
fragment_free(dcontext, f);
DEBUG_DECLARE(ok =) dr_set_isa_mode(dcontext, old_mode, NULL);
ASSERT(ok);
return ilist;
}
static void
process_nops_for_trace(dcontext_t *dcontext, instrlist_t *ilist,
uint flags _IF_DEBUG(bool recreating))
{
if (PAD_FRAGMENT_JMPS(flags) && !INTERNAL_OPTION(pad_jmps_shift_bb)) {
* default. Synchronize changes here with recreate_fragment_ilist.
* This hack is protected by asserts in nop_pad_ilist() (that
* we never add nops to a bb if -pad_jmps_shift_bb) and in
* extend_trace_pad_bytes (that we only add bbs to traces). */
* protective assert in nop_pad_ilist() */
remove_nops_from_ilist(dcontext, ilist _IF_DEBUG(recreating));
}
}
static inline int
tracelist_add(dcontext_t *dcontext, instrlist_t *ilist, instr_t *where, instr_t *inst)
{
* anyway, and we'll re-use any memory allocated here for an encoding
*/
int size;
#if defined(X86) && defined(X64)
if (!X64_CACHE_MODE_DC(dcontext)) {
instr_set_x86_mode(inst, true );
instr_shrink_to_32_bits(inst);
}
#endif
size = instr_length(dcontext, inst);
instrlist_preinsert(ilist, where, inst);
return size;
}
#ifdef X86
static inline int
tracelist_add_after(dcontext_t *dcontext, instrlist_t *ilist, instr_t *where,
instr_t *inst)
{
* anyway, and we'll re-use any memory allocated here for an encoding
*/
int size;
# ifdef X64
if (!X64_CACHE_MODE_DC(dcontext)) {
instr_set_x86_mode(inst, true );
instr_shrink_to_32_bits(inst);
}
# endif
size = instr_length(dcontext, inst);
instrlist_postinsert(ilist, where, inst);
return size;
}
#endif
#ifdef HASHTABLE_STATISTICS
int
insert_increment_stat_counter(dcontext_t *dcontext, instrlist_t *trace, instr_t *next,
uint *counter_address)
{
int added_size = 0;
opnd_t private_branchtype_counter = OPND_CREATE_ABSMEM(counter_address, OPSZ_4);
added_size += tracelist_add(dcontext, trace, next,
XINST_CREATE_load(dcontext, opnd_create_reg(SCRATCH_REG2),
private_branchtype_counter));
added_size += tracelist_add(
dcontext, trace, next,
XINST_CREATE_add(dcontext, opnd_create_reg(SCRATCH_REG2), OPND_CREATE_INT8(1)));
added_size += tracelist_add(dcontext, trace, next,
XINST_CREATE_store(dcontext, private_branchtype_counter,
opnd_create_reg(SCRATCH_REG2)));
return added_size;
}
#endif
* assumes target instrlist is a trace!
* returns size to be added to trace
*/
static inline int
insert_restore_spilled_xcx(dcontext_t *dcontext, instrlist_t *trace, instr_t *next)
{
int added_size = 0;
if (DYNAMO_OPTION(private_ib_in_tls)) {
#ifdef X86
if (X64_CACHE_MODE_DC(dcontext) && !X64_MODE_DC(dcontext) &&
IF_X64_ELSE(DYNAMO_OPTION(x86_to_x64_ibl_opt), false)) {
added_size +=
tracelist_add(dcontext, trace, next,
INSTR_CREATE_mov_ld(dcontext, opnd_create_reg(REG_XCX),
opnd_create_reg(REG_R9)));
} else
#endif
{
added_size += tracelist_add(
dcontext, trace, next,
XINST_CREATE_load(
dcontext, opnd_create_reg(SCRATCH_REG2),
opnd_create_tls_slot(os_tls_offset(MANGLE_XCX_SPILL_SLOT))));
}
} else {
* mcontext for private fragments, and all traces are private
*/
added_size += tracelist_add(dcontext, trace, next,
instr_create_restore_from_dcontext(
dcontext, SCRATCH_REG2, SCRATCH_REG2_OFFS));
}
return added_size;
}
bool
instr_is_trace_cmp(dcontext_t *dcontext, instr_t *inst)
{
if (!instr_is_our_mangling(inst))
return false;
#ifdef X86
return
# ifdef X64
instr_get_opcode(inst) == OP_mov_imm ||
instr_get_opcode(inst) == OP_mov_st || instr_get_opcode(inst) == OP_lahf ||
instr_get_opcode(inst) == OP_seto || instr_get_opcode(inst) == OP_cmp ||
instr_get_opcode(inst) == OP_jnz || instr_get_opcode(inst) == OP_add ||
instr_get_opcode(inst) == OP_sahf
# else
instr_get_opcode(inst) == OP_lea || instr_get_opcode(inst) == OP_jecxz ||
instr_get_opcode(inst) == OP_jmp
# endif
;
#elif defined(AARCH64)
return instr_get_opcode(inst) == OP_movz || instr_get_opcode(inst) == OP_movk ||
instr_get_opcode(inst) == OP_eor || instr_get_opcode(inst) == OP_cbnz;
#elif defined(ARM)
ASSERT_NOT_IMPLEMENTED(DYNAMO_OPTION(disable_traces));
return false;
#elif defined(RISCV64)
ASSERT_NOT_IMPLEMENTED(DYNAMO_OPTION(disable_traces));
return false;
#endif
}
* if value is matched continue target is assumed to be immediately
* after targeter (which must be < 127 bytes away).
* returns size to be added to trace
*/
static int
insert_transparent_comparison(dcontext_t *dcontext, instrlist_t *trace,
instr_t *targeter,
app_pc speculative_tag)
{
int added_size = 0;
#ifdef X86
instr_t *jecxz;
instr_t *continue_label = INSTR_CREATE_label(dcontext);
* cmp ecx,const
* we use:
* lea -const(ecx) -> ecx
* jecxz continue
* lea const(ecx) -> ecx
* jmp exit # usual targeter for stay on trace comparison
* continue: # if match, we target post-targeter
*
* we have to use the landing pad b/c we don't know whether the
* stub will be <128 away
*/
added_size += tracelist_add(
dcontext, trace, targeter,
INSTR_CREATE_lea(dcontext, opnd_create_reg(REG_ECX),
opnd_create_base_disp(REG_ECX, REG_NULL, 0,
-((int)(ptr_int_t)speculative_tag),
OPSZ_lea)));
jecxz = INSTR_CREATE_jecxz(dcontext, opnd_create_instr(continue_label));
instr_set_meta(jecxz);
added_size += tracelist_add(dcontext, trace, targeter, jecxz);
IF_X64(ASSERT_NOT_IMPLEMENTED(!X64_MODE_DC(dcontext)));
added_size +=
tracelist_add(dcontext, trace, targeter,
INSTR_CREATE_lea(dcontext, opnd_create_reg(REG_ECX),
opnd_create_base_disp(
REG_ECX, REG_NULL, 0,
((int)(ptr_int_t)speculative_tag), OPSZ_lea)));
added_size += tracelist_add_after(dcontext, trace, targeter, continue_label);
#elif defined(ARM)
ASSERT_NOT_IMPLEMENTED(false);
#endif
return added_size;
}
#if defined(X86) && defined(X64)
static int
mangle_x64_ib_in_trace(dcontext_t *dcontext, instrlist_t *trace, instr_t *targeter,
app_pc next_tag)
{
int added_size = 0;
if (X64_MODE_DC(dcontext) || !DYNAMO_OPTION(x86_to_x64_ibl_opt)) {
added_size += tracelist_add(
dcontext, trace, targeter,
INSTR_CREATE_mov_st(
dcontext, opnd_create_tls_slot(os_tls_offset(PREFIX_XAX_SPILL_SLOT)),
opnd_create_reg(REG_XAX)));
added_size +=
tracelist_add(dcontext, trace, targeter,
INSTR_CREATE_mov_imm(dcontext, opnd_create_reg(REG_XAX),
OPND_CREATE_INTPTR((ptr_int_t)next_tag)));
} else {
ASSERT(X64_CACHE_MODE_DC(dcontext));
added_size += tracelist_add(dcontext, trace, targeter,
INSTR_CREATE_mov_ld(dcontext, opnd_create_reg(REG_R8),
opnd_create_reg(REG_XAX)));
added_size +=
tracelist_add(dcontext, trace, targeter,
INSTR_CREATE_mov_imm(dcontext, opnd_create_reg(REG_R10),
OPND_CREATE_INTPTR((ptr_int_t)next_tag)));
}
* -unsafe_ignore_eflags_{trace,ibl} must be equivalent
*/
if (!INTERNAL_OPTION(unsafe_ignore_eflags_trace)) {
if (X64_MODE_DC(dcontext) || !DYNAMO_OPTION(x86_to_x64_ibl_opt)) {
added_size += tracelist_add(
dcontext, trace, targeter,
INSTR_CREATE_mov_st(
dcontext,
opnd_create_tls_slot(os_tls_offset(INDIRECT_STUB_SPILL_SLOT)),
opnd_create_reg(REG_XAX)));
}
added_size +=
tracelist_add(dcontext, trace, targeter, INSTR_CREATE_lahf(dcontext));
if (!INTERNAL_OPTION(unsafe_ignore_overflow)) {
added_size += tracelist_add(
dcontext, trace, targeter,
INSTR_CREATE_setcc(dcontext, OP_seto, opnd_create_reg(REG_AL)));
}
if (X64_MODE_DC(dcontext) || !DYNAMO_OPTION(x86_to_x64_ibl_opt)) {
added_size += tracelist_add(
dcontext, trace, targeter,
INSTR_CREATE_cmp(
dcontext, opnd_create_reg(REG_XCX),
opnd_create_tls_slot(os_tls_offset(INDIRECT_STUB_SPILL_SLOT))));
} else {
added_size +=
tracelist_add(dcontext, trace, targeter,
INSTR_CREATE_cmp(dcontext, opnd_create_reg(REG_XCX),
opnd_create_reg(REG_R10)));
}
} else {
added_size += tracelist_add(
dcontext, trace, targeter,
INSTR_CREATE_cmp(dcontext, opnd_create_reg(REG_XCX),
(X64_MODE_DC(dcontext) || !DYNAMO_OPTION(x86_to_x64_ibl_opt))
? opnd_create_reg(REG_XAX)
: opnd_create_reg(REG_R10)));
}
* that is after the eflags save) */
instr_set_opcode(targeter, OP_jnz);
added_size++;
ASSERT(opnd_is_pc(instr_get_target(targeter)));
instr_set_target(targeter,
opnd_create_pc(get_trace_cmp_entry(
dcontext, opnd_get_pc(instr_get_target(targeter)))));
instr_exit_branch_set_type(targeter,
instr_exit_branch_type(targeter) | INSTR_TRACE_CMP_EXIT);
return added_size;
}
#endif
#ifdef AARCH64
* before each indirect branch if they were mangled properly.
* An indirect branch:
* br jump_target_reg
* is mangled into (see mangle_indirect_jump in mangle.c):
* str IBL_TARGET_REG, TLS_REG2_SLOT
* mov IBL_TARGET_REG, jump_target_reg
* b ibl_routine or indirect_stub
* This function is used by mangle_indirect_branch_in_trace;
* it removes the two mangled instructions and returns the jump_target_reg id.
*
* This function is an optimisation in that we can avoid the spill of
* IBL_TARGET_REG as we know that the actual target is always in a register
* (or the stolen register slot, see below) on AArch64 and we can compare this
* value in the register directly with the actual target.
* And we delay the loading of the target into IBL_TARGET_REG
* (done in fixup_indirect_trace_exit) until we are on the miss path.
*
* However, there is a special case where there isn't a str/mov being patched
* rather there is an str/ldr which could happen when the jump target is stored
* in the stolen register. For example:
*
* ....
* blr stolen_reg -> %x30
* b ibl_routine
*
* is mangled into
*
* ...
* str tgt_reg, TLS_REG2_SLOT
* ldr tgt_reg, TLS_REG_STOLEN_SLOT
* b ibl_routine
*
* This means that we should not remove the str/ldr, rather we need to compare the
* trace_next_exit with tgt_reg directly and remember to restore the value of tgt_reg
* in the case when the branch is not taken.
*
*/
static reg_id_t
check_patched_ibl(dcontext_t *dcontext, instrlist_t *trace, instr_t *targeter,
int *added_size, bool *tgt_in_stolen_reg)
{
instr_t *prev = instr_get_prev(targeter);
for (prev = instr_get_prev_expanded(dcontext, trace, targeter); prev;
prev = instr_get_prev(prev)) {
instr_t *prev_prev = instr_get_prev(prev);
if (prev_prev == NULL)
break;
* prev_prev str IBL_TARGET_REG, TLS_REG2_SLOT
* prev mov IBL_TARGET_REG, jump_target_reg
*/
if (instr_get_opcode(prev_prev) == OP_str && instr_get_opcode(prev) == OP_orr &&
opnd_get_reg(instr_get_src(prev_prev, 0)) == IBL_TARGET_REG &&
opnd_get_base(instr_get_dst(prev_prev, 0)) == dr_reg_stolen &&
opnd_get_reg(instr_get_dst(prev, 0)) == IBL_TARGET_REG) {
reg_id_t jp_tg_reg = opnd_get_reg(instr_get_src(prev, 1));
instrlist_remove(trace, prev_prev);
instr_destroy(dcontext, prev_prev);
instrlist_remove(trace, prev);
instr_destroy(dcontext, prev);
LOG(THREAD, LOG_INTERP, 4, "found and removed str/mov\n");
*added_size -= 2 * AARCH64_INSTR_SIZE;
return jp_tg_reg;
* prev_prev str IBL_TARGET_REG, TLS_REG2_SLOT
* prev ldr IBL_TARGET_REG, TLS_REG_STOLEN_SLOT
*/
} else if (instr_get_opcode(prev_prev) == OP_str &&
instr_get_opcode(prev) == OP_ldr &&
opnd_get_reg(instr_get_src(prev_prev, 0)) == IBL_TARGET_REG &&
opnd_get_base(instr_get_src(prev, 0)) == dr_reg_stolen &&
opnd_get_reg(instr_get_dst(prev, 0)) == IBL_TARGET_REG) {
*tgt_in_stolen_reg = true;
LOG(THREAD, LOG_INTERP, 4, "jump target is in stolen register slot\n");
return IBL_TARGET_REG;
}
}
return DR_REG_NULL;
}
* the direct branch, which is mangled by cbz/cbnz stolen register.
* For example:
* cbz x28, target
* would be mangled (see mangle_cbr_stolen_reg() in aarchxx/mangle.c) into:
* str x0, [x28]
* ldr x0, [x28, #32]
* cbnz x0, fall <- meta instr, not treated as exit cti
* ldr x0, [x28]
* b target <- delete after
* fall:
* ldr x0, [x28]
* b fall_target
* ...
* If we delete all code after "b target", then the "fall" path would
* be lost. Therefore we need to append the fall path at the end of
* the trace as a fake exit stub. Swapping them might be dangerous since
* a stub trace may be created on both paths.
*
* XXX i#5062 This special case is not needed when we elimiate decoding from code cache
*/
static bool
instr_is_cbr_stolen(instr_t *instr)
{
if (!instr)
return false;
else {
instr_get_opcode(instr);
return instr->opcode == OP_cbz || instr->opcode == OP_cbnz ||
instr->opcode == OP_tbz || instr->opcode == OP_tbnz;
}
}
static bool
instr_is_load_tls(instr_t *instr)
{
if (!instr || !instr_raw_bits_valid(instr))
return false;
else {
return instr_get_opcode(instr) == OP_ldr &&
opnd_get_base(instr_get_src(instr, 0)) == dr_reg_stolen;
}
}
static instr_t *
fixup_cbr_on_stolen_reg(dcontext_t *dcontext, instrlist_t *trace, instr_t *targeter)
{
* cbz/cbnz ...
* ldr reg, [x28, #SLOT]
* Otherwise, just return the previous instruction.
*/
instr_t *prev = instr_get_prev_expanded(dcontext, trace, targeter);
if (!instr_is_load_tls(prev))
return prev;
instr_t *prev_prev = instr_get_prev_expanded(dcontext, trace, prev);
if (!instr_is_cbr_stolen(prev_prev))
return prev;
instr_t *next = instr_get_next_expanded(dcontext, trace, targeter);
if (!next)
return prev;
ASSERT_CURIOSITY(instr_is_load_tls(next));
instr_t *next_next = instr_get_next_expanded(dcontext, trace, next);
if (!next_next)
return prev;
ASSERT_CURIOSITY(instr_is_ubr(next_next));
instr_set_target(prev_prev, instr_get_target(next_next));
return prev;
}
#endif
* is being added as the next block beyond the indirect branch.
* Returns the size of instructions added to trace.
*/
static int
mangle_indirect_branch_in_trace(dcontext_t *dcontext, instrlist_t *trace,
instr_t *targeter, app_pc next_tag, uint next_flags,
instr_t **delete_after , instr_t *end_instr)
{
int added_size = 0;
instr_t *next = instr_get_next(targeter);
ASSERT(instr_is_ubr(targeter));
ASSERT((end_instr != NULL && targeter == end_instr) ||
targeter == instrlist_last(trace));
ASSERT(delete_after != NULL);
*delete_after = (next == NULL || (end_instr != NULL && targeter == end_instr))
? NULL
: instr_get_prev(next);
STATS_INC(trace_ib_cmp);
#if defined(X86)
* based on indirect target not equaling next block in trace
*
* the bb has already done:
* spill xcx to xcx-tls-spill-slot
* mov curtarget, xcx
* <any other side effects of ind branch, like ret xsp adjust>
*
* and we now want to accomplish:
* cmp ecx,const
*
* on 32-bit we use:
* lea -const(ecx) -> ecx
* jecxz continue
* lea const(ecx) -> ecx
* jmp exit # usual targeter for stay on trace comparison
* continue: # if match, we target post-targeter
* restore ecx
* we have to use the landing pad b/c we don't know whether the
* stub will be <128 away
*
* on 64-bit we use (PR 245832):
* mov xax, xax-tls-spill-slot
* mov $staytarget, xax
* if !INTERNAL_OPTION(unsafe_ignore_eflags_{trace,ibl})
* mov xax, xbx-tls-spill-slot
* lahf
* seto al
* cmp xcx, xbx-tls-spill-slot
* else
* cmp xcx, xax
* jne exit
* if xcx live:
* mov xcx-tls-spill-slot, xcx
* if flags live && unsafe options not on:
* add 7f, al
* sahf
* if xax live:
* mov xax-tls-spill-slot, xax
*/
# ifdef CUSTOM_TRACES_RET_REMOVAL
IF_X64(ASSERT_NOT_IMPLEMENTED(false));
* FIXME: also handle ret imm => prev instr is add
*/
inst = instr_get_prev(targeter);
if (dcontext->call_depth >= 0 && instr_raw_bits_valid(inst)) {
byte *b = inst->bytes + inst->length - 1;
0x40538115 89 0d ec 68 06 40 mov %ecx -> 0x400668ec
0x4053811b 59 pop %esp (%esp) -> %ecx %esp
0x4053811c 83 c4 04 add $0x04 %esp -> %esp
*/
LOG(THREAD, LOG_MONITOR, 4,
"ret removal: *b=0x%x, prev=" PFX ", dcontext=" PFX ", 0x%x\n", *b,
*((int *)(b - 4)), dcontext, XCX_OFFSET);
if ((*b == 0x59 && *((int *)(b - 4)) == ((uint)dcontext) + XCX_OFFSET) ||
(*(b - 3) == 0x59 && *((int *)(b - 7)) == ((uint)dcontext) + XCX_OFFSET &&
*(b - 2) == 0x83 && *(b - 1) == 0xc4)) {
uint esp_add;
* if not negative, the call for this ret is earlier in this trace!
*/
LOG(THREAD, LOG_MONITOR, 4, "fixup_last_cti: removing ret!\n");
if (*b == 0x59) {
instr_set_raw_bits(inst, inst->bytes, inst->length - 7);
esp_add = 4;
} else {
instr_set_raw_bits(inst, inst->bytes, inst->length - 10);
esp_add = 4 + (uint)(*b);
LOG(THREAD, LOG_MONITOR, 4, "*b=0x%x, esp_add=%d\n", *b, esp_add);
}
# ifdef DEBUG
num_rets_removed++;
# endif
removed_ret = true;
added_size +=
tracelist_add(dcontext, trace, targeter,
INSTR_CREATE_lea(dcontext, opnd_create_reg(REG_ESP),
opnd_create_base_disp(REG_ESP, REG_NULL, 0,
esp_add, OPSZ_lea)));
}
}
if (removed_ret) {
*delete_after = instr_get_prev(targeter);
return added_size;
}
# endif
# ifdef X64
if (X64_CACHE_MODE_DC(dcontext)) {
added_size += mangle_x64_ib_in_trace(dcontext, trace, targeter, next_tag);
} else {
# endif
if (!INTERNAL_OPTION(unsafe_ignore_eflags_trace)) {
added_size +=
insert_transparent_comparison(dcontext, trace, targeter, next_tag);
* lookup) */
} else {
* so go ahead and use cmp, jne
*/
IF_X64(ASSERT_NOT_IMPLEMENTED(!X64_MODE_DC(dcontext)));
added_size += tracelist_add(
dcontext, trace, targeter,
INSTR_CREATE_cmp(dcontext, opnd_create_reg(REG_ECX),
OPND_CREATE_INT32((int)(ptr_int_t)next_tag)));
instr_set_opcode(targeter, OP_jnz);
added_size++;
}
# ifdef X64
}
# endif
instr_set_our_mangling(targeter, true);
LOG(THREAD, LOG_MONITOR, 3, "fixup_last_cti: added cmp vs. " PFX " for ind br\n",
next_tag);
# ifdef HASHTABLE_STATISTICS
if (INTERNAL_OPTION(stay_on_trace_stats)) {
ibl_type_t ibl_type;
DEBUG_DECLARE(bool ok =)
get_ibl_routine_type(dcontext, opnd_get_pc(instr_get_target(targeter)),
&ibl_type);
ASSERT(ok);
added_size += insert_increment_stat_counter(
dcontext, trace, next,
&get_ibl_per_type_statistics(dcontext, ibl_type.branch_type)
->ib_stay_on_trace_stat);
}
# endif
* TODO optimization: check if xcx is live or not in next bb */
added_size += insert_restore_spilled_xcx(dcontext, trace, next);
# ifdef X64
if (X64_CACHE_MODE_DC(dcontext)) {
LOG(THREAD, LOG_INTERP, 4, "next_flags for post-ibl-cmp: 0x%x\n", next_flags);
if (!TEST(FRAG_WRITES_EFLAGS_6, next_flags) &&
!INTERNAL_OPTION(unsafe_ignore_eflags_trace)) {
if (!TEST(FRAG_WRITES_EFLAGS_OF, next_flags) &&
!INTERNAL_OPTION(unsafe_ignore_overflow)) {
added_size +=
tracelist_add(dcontext, trace, next,
INSTR_CREATE_add(dcontext, opnd_create_reg(REG_AL),
OPND_CREATE_INT8(0x7f)));
}
added_size +=
tracelist_add(dcontext, trace, next, INSTR_CREATE_sahf(dcontext));
} else
STATS_INC(trace_ib_no_flag_restore);
if (X64_MODE_DC(dcontext) || !DYNAMO_OPTION(x86_to_x64_ibl_opt)) {
added_size += tracelist_add(
dcontext, trace, next,
INSTR_CREATE_mov_ld(
dcontext, opnd_create_reg(REG_XAX),
opnd_create_tls_slot(os_tls_offset(PREFIX_XAX_SPILL_SLOT))));
} else {
added_size +=
tracelist_add(dcontext, trace, next,
INSTR_CREATE_mov_ld(dcontext, opnd_create_reg(REG_XAX),
opnd_create_reg(REG_R8)));
}
}
# endif
#elif defined(AARCH64)
instr_t *instr;
reg_id_t jump_target_reg;
reg_id_t scratch;
bool tgt_in_stolen_reg;
* based on indirect target not being equal to next block in trace.
* Original ibl lookup:
* str tgt_reg, TLS_REG2_SLOT
* mov tgt_reg, jump_target
* b ibl_routine
* Now we rewrite it into:
* str x0, TLS_REG0_SLOT
* mov x0, #trace_next_target
* eor x0, x0, jump_target
* cbnz x0, trace_exit (ibl routine)
* ldr x0, TLS_REG0_SLOT
*/
tgt_in_stolen_reg = false;
jump_target_reg =
check_patched_ibl(dcontext, trace, targeter, &added_size, &tgt_in_stolen_reg);
if (jump_target_reg == DR_REG_NULL) {
ASSERT_MESSAGE(2, "Failed to get branch target register in creating trace",
false);
return added_size;
}
LOG(THREAD, LOG_MONITOR, 4, "fixup_last_cti: jump target reg is %s\n",
reg_names[jump_target_reg]);
scratch = (jump_target_reg == DR_REG_X0) ? DR_REG_X1 : DR_REG_X0;
added_size +=
tracelist_add(dcontext, trace, next,
instr_create_save_to_tls(dcontext, scratch, TLS_REG0_SLOT));
instr_t *first = NULL;
instr_t *end = NULL;
instrlist_insert_mov_immed_ptrsz(dcontext, (ptr_int_t)next_tag,
opnd_create_reg(scratch), trace, next, &first, &end);
instr = first;
while (instr != end) {
added_size += AARCH64_INSTR_SIZE;
instr = instr_get_next(instr);
}
added_size += AARCH64_INSTR_SIZE;
added_size += tracelist_add(dcontext, trace, next,
INSTR_CREATE_eor(dcontext, opnd_create_reg(scratch),
opnd_create_reg(jump_target_reg)));
* branch to original ibl lookup routine */
instr =
INSTR_CREATE_cbnz(dcontext, instr_get_target(targeter), opnd_create_reg(scratch));
instr_exit_branch_set_type(instr, instr_exit_branch_type(targeter));
added_size += tracelist_add(dcontext, trace, next, instr);
ASSERT(TLS_REG0_SLOT != IBL_TARGET_SLOT);
added_size +=
tracelist_add(dcontext, trace, next,
instr_create_restore_from_tls(dcontext, scratch, TLS_REG0_SLOT));
if (tgt_in_stolen_reg) {
* This is not needed in the str/mov case as we removed the instruction to
* spill the value of IBL_TARGET_REG (str tgt_reg, TLS_REG2_SLOT)
*
* ldr x2 TLS_REG2_SLOT
*/
added_size += tracelist_add(
dcontext, trace, next,
instr_create_restore_from_tls(dcontext, IBL_TARGET_REG, IBL_TARGET_SLOT));
}
instrlist_remove(trace, targeter);
instr_destroy(dcontext, targeter);
added_size -= AARCH64_INSTR_SIZE;
#elif defined(ARM)
ASSERT_NOT_IMPLEMENTED(false);
#endif
return added_size;
}
* previous block when adding a new block (f) to the trace fragment.
* If prev_l is not NULL, matches the ordinal of prev_l to the nth
* exit cti in the trace instrlist_t.
*
* If prev_l is NULL: WARNING: this routine assumes that the previous
* block can only have a single indirect branch -- otherwise there is
* no way to determine which indirect exit targeted the new block! No
* assumptions are made about direct exits -- we can walk through them
* all to find the one that targeted the new block.
*
* Returns an upper bound on the size added to the trace with inserted
* instructions.
* If we change this to add a substantial # of instrs, should update
* TRACE_CTI_MANGLE_SIZE_UPPER_BOUND (assert at bottom should notify us)
*
* If you want to re-add the ability to add the front end of a trace,
* revive the now-removed CUSTOM_TRACES_ADD_TRACE define from the attic.
*/
static int
fixup_last_cti(dcontext_t *dcontext, instrlist_t *trace, app_pc next_tag, uint next_flags,
uint trace_flags, fragment_t *prev_f, linkstub_t *prev_l,
bool record_translation, uint *num_exits_deleted ,
instr_t *start_instr, instr_t *end_instr)
{
app_pc target_tag;
instr_t *inst, *targeter = NULL;
instr_t *delete_after = NULL;
bool is_indirect = false;
* Use tracelist_add to automate adding inserted instr sizes.
*/
int added_size = 0;
uint exits_deleted = 0;
* start at prev_l, and count up extraneous exits and blks until end
*/
uint nth_exit = 0, cur_exit;
#ifdef CUSTOM_TRACES_RET_REMOVAL
bool removed_ret = false;
#endif
bool have_ordinal = false;
if (prev_l != NULL && prev_l == get_deleted_linkstub(dcontext)) {
int last_ordinal = get_last_linkstub_ordinal(dcontext);
if (last_ordinal != -1) {
nth_exit = last_ordinal;
have_ordinal = true;
}
}
if (!have_ordinal && prev_l != NULL && !LINKSTUB_FAKE(prev_l)) {
linkstub_t *stub = FRAGMENT_EXIT_STUBS(prev_f);
while (stub != prev_l)
stub = LINKSTUB_NEXT_EXIT(stub);
* but we want 0, so we count prev_l itself, then decrement
*/
stub = LINKSTUB_NEXT_EXIT(stub);
while (stub != NULL) {
nth_exit++;
stub = LINKSTUB_NEXT_EXIT(stub);
}
}
LOG(THREAD, LOG_MONITOR, 4,
"fixup_last_cti: looking for %d-th exit cti from bottom\n", nth_exit);
if (start_instr != NULL) {
ASSERT(end_instr != NULL);
} else {
start_instr = instrlist_first(trace);
end_instr = instrlist_last(trace);
}
start_instr = instr_get_prev(start_instr);
cur_exit = nth_exit;
* we don't have any way to find boundary with previous-previous block
* to make sure we didn't go backwards too far -- does it matter?
*/
for (inst = end_instr; inst != NULL && inst != start_instr;
inst = instr_get_prev(inst)) {
if (instr_is_exit_cti(inst)) {
if (cur_exit == 0) {
ibl_type_t ibl_type;
target_tag = opnd_get_pc(instr_get_target(inst));
is_indirect = get_ibl_routine_type(dcontext, target_tag, &ibl_type);
if (is_indirect) {
ASSERT(IS_IBL_TRACE(ibl_type.source_fragment_type));
targeter = inst;
break;
} else {
if (prev_l != NULL) {
ASSERT(target_tag == next_tag);
targeter = inst;
break;
} else {
DOLOG(4, LOG_MONITOR,
{ d_r_loginst(dcontext, 4, inst, "exit==targeter?"); });
LOG(THREAD, LOG_MONITOR, 4,
"target_tag = " PFX ", next_tag = " PFX "\n", target_tag,
next_tag);
if (target_tag == next_tag) {
targeter = inst;
break;
}
}
}
} else if (prev_l != NULL) {
LOG(THREAD, LOG_MONITOR, 4,
"counting backwards: %d == target_tag = " PFX "\n", cur_exit,
opnd_get_pc(instr_get_target(inst)));
cur_exit--;
}
}
}
ASSERT(targeter != NULL);
if (record_translation)
instrlist_set_translation_target(trace, instr_get_translation(targeter));
instrlist_set_our_mangling(trace, true);
DOLOG(4, LOG_MONITOR, { d_r_loginst(dcontext, 4, targeter, "\ttargeter"); });
if (is_indirect) {
added_size += mangle_indirect_branch_in_trace(
dcontext, trace, targeter, next_tag, next_flags, &delete_after, end_instr);
} else {
instr_t *next = targeter->next;
if (instr_is_cbr(targeter)) {
LOG(THREAD, LOG_MONITOR, 4, "fixup_last_cti: inverted logic of cbr\n");
if (next != NULL && instr_is_ubr(next)) {
* remove ubr
*/
instr_invert_cbr(targeter);
instr_set_target(targeter, instr_get_target(next));
ASSERT(next == end_instr);
delete_after = targeter;
LOG(THREAD, LOG_MONITOR, 4, "\tremoved ubr following cbr\n");
} else {
ASSERT_NOT_REACHED();
}
} else if (instr_is_ubr(targeter)) {
#ifdef AARCH64
delete_after = fixup_cbr_on_stolen_reg(dcontext, trace, targeter);
#else
delete_after = instr_get_prev(targeter);
#endif
if (delete_after != NULL) {
LOG(THREAD, LOG_MONITOR, 4, "fixup_last_cti: removed ubr\n");
}
} else
ASSERT_NOT_REACHED();
}
* control flow jumps ahead and then comes back?
* too expensive to check for such all the time.
* XXX: what to do?
*
* XXX: rather than adding entire trace on and then chopping off where
* we exited, why not add after we know where to stop?
*/
if (delete_after != NULL) {
ASSERT(delete_after != end_instr);
delete_after = instr_get_next(delete_after);
while (delete_after != NULL) {
inst = delete_after;
if (delete_after == end_instr)
delete_after = NULL;
else
delete_after = instr_get_next(delete_after);
if (instr_is_exit_cti(inst)) {
* just as well as linkstub_t target, since only cares whether
* targeting ibl
*/
app_pc target = opnd_get_pc(instr_get_target(inst));
* so we subtract the trace size of the stub here
*/
added_size -= local_exit_stub_size(dcontext, target, trace_flags);
exits_deleted++;
} else if (instr_opcode_valid(inst) && instr_is_cti(inst)) {
LOG(THREAD, LOG_MONITOR, 3,
"WARNING: deleting non-exit cti in unused tail of frag added to "
"trace\n");
}
d_r_loginst(dcontext, 4, inst, "\tdeleting");
instrlist_remove(trace, inst);
added_size -= instr_length(dcontext, inst);
instr_destroy(dcontext, inst);
}
}
if (num_exits_deleted != NULL)
*num_exits_deleted = exits_deleted;
if (record_translation)
instrlist_set_translation_target(trace, NULL);
instrlist_set_our_mangling(trace, false);
#if defined(X86) && defined(X64)
DOCHECK(1, {
if (FRAG_IS_32(trace_flags)) {
instr_t *in;
for (in = instrlist_first(trace); in != NULL; in = instr_get_next(in)) {
if (instr_is_our_mangling(in))
ASSERT(instr_get_x86_mode(in));
}
}
});
#endif
ASSERT(added_size < TRACE_CTI_MANGLE_SIZE_UPPER_BOUND);
return added_size;
}
* Returns additional size to add to trace estimate.
*/
int
append_trace_speculate_last_ibl(dcontext_t *dcontext, instrlist_t *trace,
app_pc speculate_next_tag, bool record_translation)
{
int added_size = 0;
ibl_type_t ibl_type;
instr_t *inst = instrlist_last(trace);
instr_t *where = inst;
instr_t *next = instr_get_next(inst);
DEBUG_DECLARE(bool ok;)
ASSERT(speculate_next_tag != NULL);
ASSERT(inst != NULL);
ASSERT(instr_is_exit_cti(inst));
DEBUG_DECLARE(ok =)
get_ibl_routine_type(dcontext, opnd_get_pc(instr_get_target(inst)), &ibl_type);
ASSERT(ok);
if (record_translation)
instrlist_set_translation_target(trace, instr_get_translation(inst));
instrlist_set_our_mangling(trace, true);
STATS_INC(num_traces_end_at_ibl_speculative_link);
#ifdef HASHTABLE_STATISTICS
DOSTATS({
if (INTERNAL_OPTION(speculate_last_exit_stats)) {
int tls_stat_scratch_slot = os_tls_offset(HTABLE_STATS_SPILL_SLOT);
added_size += tracelist_add(
dcontext, trace, where,
XINST_CREATE_store(dcontext, opnd_create_tls_slot(tls_stat_scratch_slot),
opnd_create_reg(SCRATCH_REG2)));
added_size += insert_increment_stat_counter(
dcontext, trace, where,
&get_ibl_per_type_statistics(dcontext, ibl_type.branch_type)
->ib_trace_last_ibl_exit);
added_size += tracelist_add(
dcontext, trace, where,
XINST_CREATE_load(dcontext, opnd_create_reg(SCRATCH_REG2),
opnd_create_tls_slot(tls_stat_scratch_slot)));
}
});
#endif
* statistics after it
*/
IF_X64(ASSERT_NOT_IMPLEMENTED(false));
* 8d 89 76 9b bf ff lea -tag(%ecx) -> %ecx
* e3 0b jecxz continue
* 8d 89 8a 64 40 00 lea tag(%ecx) -> %ecx
* e9 17 00 00 00 jmp <exit stub 1: IBL>
*
* continue:
* <increment stats>
* # see FIXME whether to go to prefix or do here
* <restore app ecx>
* e9 cc aa dd 00 jmp speculate_next_tag
*
*/
added_size +=
insert_transparent_comparison(dcontext, trace, where, speculate_next_tag);
#ifdef HASHTABLE_STATISTICS
DOSTATS({
reg_id_t reg = SCRATCH_REG2;
if (INTERNAL_OPTION(speculate_last_exit_stats)) {
int tls_stat_scratch_slot = os_tls_offset(HTABLE_STATS_SPILL_SLOT);
added_size += insert_increment_stat_counter(
dcontext, trace, next,
&get_ibl_per_type_statistics(dcontext, ibl_type.branch_type)
->ib_trace_last_ibl_speculate_success);
added_size += tracelist_add(
dcontext, trace, next,
XINST_CREATE_load(dcontext, opnd_create_reg(reg),
opnd_create_tls_slot(tls_stat_scratch_slot)));
}
});
#endif
* direct exit. Although we could have used the indirect stub
* to be the unlinked path, with a new CTI way we can unlink a
* speculated fragment without affecting any other targets
* reached by the IBL. Also in general we could decide to add
* multiple speculative comparisons and to chain them we'd
* need new CTIs for them.
*/
* and unlinked paths - currently only XCX is in use.
*
*
* Preferably we should be targeting prefix of target to
* save some space for recovering XCX from hot path. We'd
* restore XCX in the exit stub when unlinked.
* So it would act like a direct CTI when linked and like indirect
* when unlinked. It could just be an unlinked indirect stub, if
* we haven't modified any other registers or flags.
*
* For simplicity, we currently restore XCX here and use a plain
* direct exit stub that goes to target start_pc instead of
* prefixes.
*
* FIXME: (case 5085) the problem with the current scheme is that
* when we exit unlinked the source will be marked as a DIRECT
* exit - therefore no security policies will be enforced.
*
* FIXME: (case 4718) should add speculated target to current list
* in case of RCT policy that needs to be invalidated if target is
* flushed
*/
added_size += insert_restore_spilled_xcx(dcontext, trace, next);
added_size +=
tracelist_add(dcontext, trace, next,
XINST_CREATE_jump(dcontext, opnd_create_pc(speculate_next_tag)));
LOG(THREAD, LOG_INTERP, 3,
"append_trace_speculate_last_ibl: added cmp vs. " PFX " for ind br\n",
speculate_next_tag);
if (record_translation)
instrlist_set_translation_target(trace, NULL);
instrlist_set_our_mangling(trace, false);
return added_size;
}
#ifdef HASHTABLE_STATISTICS
* if speculate_next_tag is not NULL then check case 4817's possible success
*/
* currently useful only to see statistics without side effects of
* adding exit stub
*/
int
append_ib_trace_last_ibl_exit_stat(dcontext_t *dcontext, instrlist_t *trace,
app_pc speculate_next_tag)
{
int tls_stat_scratch_slot = os_tls_offset(HTABLE_STATS_SPILL_SLOT);
int added_size = 0;
ibl_type_t ibl_type;
instr_t *inst = instrlist_last(trace);
instr_t *where = inst;
reg_id_t reg = SCRATCH_REG2;
DEBUG_DECLARE(bool ok;)
IF_X64(ASSERT_NOT_IMPLEMENTED(false));
ASSERT(inst != NULL);
ASSERT(instr_is_exit_cti(inst));
ok = get_ibl_routine_type(dcontext, opnd_get_pc(instr_get_target(inst)), &ibl_type);
ASSERT(ok);
added_size += tracelist_add(
dcontext, trace, where,
XINST_CREATE_store(dcontext, opnd_create_tls_slot(tls_stat_scratch_slot),
opnd_create_reg(reg)));
added_size += insert_increment_stat_counter(
dcontext, trace, where,
&get_ibl_per_type_statistics(dcontext, ibl_type.branch_type)
->ib_trace_last_ibl_exit);
added_size +=
tracelist_add(dcontext, trace, where,
XINST_CREATE_load(dcontext, opnd_create_reg(reg),
opnd_create_tls_slot(tls_stat_scratch_slot)));
if (speculate_next_tag != NULL) {
instr_t *next = instr_get_next(inst);
* fixup_last_cti() would do later.
*/
* increment success counter
* jmp targeter
*
* FIXME: now the last instruction is no longer the exit_cti - see if that
* breaks any assumptions, using a short jump to see if anyone erroneously
* uses this
*/
added_size +=
insert_transparent_comparison(dcontext, trace, where, speculate_next_tag);
added_size += insert_increment_stat_counter(
dcontext, trace, next,
&get_ibl_per_type_statistics(dcontext, ibl_type.branch_type)
->ib_trace_last_ibl_speculate_success);
added_size +=
tracelist_add(dcontext, trace, next,
XINST_CREATE_load(dcontext, opnd_create_reg(reg),
opnd_create_tls_slot(tls_stat_scratch_slot)));
added_size +=
tracelist_add(dcontext, trace, next,
IF_X86_ELSE(INSTR_CREATE_jmp_short, XINST_CREATE_jump)(
dcontext, opnd_create_instr(where)));
}
return added_size;
}
#endif
*
* Note that recreate_fragment_ilist() is making assumptions about its operation
* synchronize changes
*
* Returns the size change in the trace from mangling the previous block
* (assumes the caller has already calculated the size from adding the new block)
*/
uint
extend_trace(dcontext_t *dcontext, fragment_t *f, linkstub_t *prev_l)
{
monitor_data_t *md = (monitor_data_t *)dcontext->monitor_field;
fragment_t *prev_f = NULL;
instrlist_t *trace = &(md->trace);
instrlist_t *ilist;
uint size;
uint prev_mangle_size = 0;
uint num_exits_deleted = 0;
uint new_exits_dir = 0, new_exits_indir = 0;
#ifdef X64
ASSERT((!!FRAG_IS_32(md->trace_flags) == !X64_MODE_DC(dcontext)) ||
(!FRAG_IS_32(md->trace_flags) && !X64_MODE_DC(dcontext) &&
DYNAMO_OPTION(x86_to_x64)));
#endif
STATS_INC(num_traces_extended);
* CUSTOM_TRACES_ADD_TRACE from the attic
*/
ASSERT(!TEST(FRAG_IS_TRACE, f->flags));
if (prev_l != NULL) {
ASSERT(!LINKSTUB_FAKE(prev_l) ||
prev_l == get_deleted_linkstub(dcontext));
prev_f = linkstub_fragment(dcontext, prev_l);
LOG(THREAD, LOG_MONITOR, 4, "prev_l = owned by F%d, branch pc " PFX "\n",
prev_f->id, EXIT_CTI_PC(prev_f, prev_l));
} else {
LOG(THREAD, LOG_MONITOR, 4, "prev_l is NULL\n");
}
if (instrlist_last(trace) != NULL) {
prev_mangle_size =
fixup_last_cti(dcontext, trace, f->tag, f->flags, md->trace_flags, prev_f,
prev_l, false, &num_exits_deleted, NULL, NULL);
}
#ifdef CUSTOM_TRACES_RET_REMOVAL
dcontext->call_depth += f->num_calls;
dcontext->call_depth -= f->num_rets;
#endif
LOG(THREAD, LOG_MONITOR, 4, "\tadding block %d == " PFX "\n", md->num_blks, f->tag);
size = md->trace_buf_size - md->trace_buf_top;
LOG(THREAD, LOG_MONITOR, 4, "decoding F%d into trace buf @" PFX " + 0x%x = " PFX "\n",
f->id, md->trace_buf, md->trace_buf_top, md->trace_buf + md->trace_buf_top);
* we're going to re-mangle and re-fixup after passing our unmangled list to the
* client. We do want to keep the size estimate, which requires having the last
* cti at least, so for now we keep all the work. Of course the size estimate is
* less valuable when the client may add a ton of instrumentation.
*/
* our trace, whether f and the trace are shared or private
*/
ilist = decode_fragment(dcontext, f, md->trace_buf + md->trace_buf_top, &size,
md->trace_flags, &new_exits_dir, &new_exits_indir);
md->blk_info[md->num_blks].info.tag = f->tag;
#if defined(RETURN_AFTER_CALL) || defined(RCT_IND_BRANCH)
if (md->num_blks > 0)
md->blk_info[md->num_blks - 1].info.num_exits -= num_exits_deleted;
md->blk_info[md->num_blks].info.num_exits = new_exits_dir + new_exits_indir;
#endif
md->num_blks++;
* to be any in a bb if -pad_jmps_shift_bb) to avoid screwing up
* fixup_last_cti etc. */
process_nops_for_trace(dcontext, ilist, f->flags _IF_DEBUG(false ));
DOLOG(5, LOG_MONITOR, {
LOG(THREAD, LOG_MONITOR, 5, "post-trace-ibl-fixup, ilist is:\n");
instrlist_disassemble(dcontext, f->tag, ilist, THREAD);
});
ASSERT(!instrlist_get_our_mangling(ilist));
instrlist_append(trace, instrlist_first(ilist));
instrlist_init(ilist);
instrlist_destroy(dcontext, ilist);
md->trace_buf_top += size;
ASSERT(md->trace_buf_top < md->trace_buf_size);
LOG(THREAD, LOG_MONITOR, 4, "post-extend_trace, trace buf + 0x%x => " PFX "\n",
md->trace_buf_top, md->trace_buf);
DOLOG(4, LOG_MONITOR, {
LOG(THREAD, LOG_MONITOR, 4, "\nafter extending trace:\n");
instrlist_disassemble(dcontext, md->trace_tag, trace, THREAD);
});
return prev_mangle_size;
}
static instr_t *
create_exit_jmp(dcontext_t *dcontext, app_pc target, app_pc translation, uint branch_type)
{
instr_t *jmp = XINST_CREATE_jump(dcontext, opnd_create_pc(target));
instr_set_translation(jmp, translation);
if (branch_type == 0)
instr_exit_branch_set_type(jmp, instr_branch_type(jmp));
else
instr_exit_branch_set_type(jmp, branch_type);
instr_set_our_mangling(jmp, true);
return jmp;
}
* things. This is used both for clients and for recreating traces
* for state translation.
* It assumes the ilist abides by client rules: single-mbr bbs, no
* changes in source app code. Else, it returns false.
* Elision is supported.
*
* Our docs disallow removal of an entire block, changing inter-block ctis, and
* changing the ordering of the blocks, which is what allows us to correctly
* mangle the inter-block ctis here.
*
* Reads the following fields from md:
* - trace_tag
* - trace_flags
* - num_blks
* - blk_info
* - final_exit_flags
*/
bool
mangle_trace(dcontext_t *dcontext, instrlist_t *ilist, monitor_data_t *md)
{
instr_t *inst, *next_inst, *start_instr, *jmp;
uint blk, num_exits_deleted;
app_pc fallthrough = NULL;
bool found_syscall = false, found_int = false;
* can unregister its bb and trace hooks if it really wants to,
* though we discourage it.
*/
ASSERT(md->pass_to_client);
LOG(THREAD, LOG_MONITOR, 2, "mangle_trace " PFX "\n", md->trace_tag);
DOLOG(4, LOG_INTERP, {
LOG(THREAD, LOG_INTERP, 4, "ilist passed to mangle_trace:\n");
instrlist_disassemble(dcontext, md->trace_tag, ilist, THREAD);
});
* 1st walk: find bb boundaries
*/
blk = 0;
for (inst = instrlist_first(ilist); inst != NULL; inst = next_inst) {
app_pc xl8 = instr_get_translation(inst);
next_inst = instr_get_next(inst);
if (instr_is_meta(inst))
continue;
DOLOG(5, LOG_INTERP, {
LOG(THREAD, LOG_MONITOR, 4, "transl " PFX " ", xl8);
d_r_loginst(dcontext, 4, inst, "considering non-meta");
});
while (blk < md->num_blks - 1 && !md->blk_info[blk].final_cti) {
LOG(THREAD, LOG_MONITOR, 4, "skipping fall-through bb #%d\n", blk);
md->blk_info[blk].end_instr = NULL;
blk++;
}
if (md->pass_to_client &&
!client_check_syscall(ilist, inst, &found_syscall, &found_int))
return false;
* here, as well as mess up our cache consistency (both page prot and
* selfmod).
*/
if (md->pass_to_client &&
(!vm_list_overlaps(dcontext, md->blk_info[blk].vmlist, xl8, xl8 + 1) &&
!(instr_is_ubr(inst) && opnd_is_pc(instr_get_target(inst)) &&
xl8 == opnd_get_pc(instr_get_target(inst))))
IF_WINDOWS(&&!vmvector_overlap(landing_pad_areas,
md->blk_info[blk].info.tag,
md->blk_info[blk].info.tag + 1))) {
LOG(THREAD, LOG_MONITOR, 2,
"trace error: out-of-bounds transl " PFX " vs block w/ start " PFX "\n",
xl8, md->blk_info[blk].info.tag);
CLIENT_ASSERT(false,
"trace's app sources (instr_set_translation() targets) "
"must remain within original bounds");
return false;
}
if (blk == md->num_blks - 1) {
* of translation! (i#509)
*/
fallthrough = decode_next_pc(dcontext, xl8);
}
* ubrs can point at their targets and theoretically the entire trace could
* be ubrs: so we have to go by exits, and limit what the client can do. We
* can assume that each bb should not violate the bb callback rules (PR
* 215217): if has cbr or mbr, that must end bb. If it has a call, that
* could be elided; if not, its target should match the start of the next
* block. We also want to
* impose the can't-be-trace rules (PR 215219), which are not documented for
* bbs: if more than one exit cti or if code beyond last exit cti then can't
* be in a trace. We can soften a little and allow extra ubrs if they do not
* target the subsequent block. FIXME: we could have stricter translation
* reqts for ubrs: make them point at corresponding app ubr (but what if
* really correspond to app cbr?): then can handle code past exit ubr.
*/
if (instr_will_be_exit_cti(inst) &&
((!instr_is_ubr(inst) && !instr_is_near_call_direct(inst)) ||
(inst == instrlist_last(ilist) ||
(blk + 1 < md->num_blks &&
* hook; if they change in bb for_trace, will be reflected here.
*/
opnd_get_pc(instr_get_target(inst)) == md->blk_info[blk + 1].info.tag)))) {
DOLOG(4, LOG_INTERP, { d_r_loginst(dcontext, 4, inst, "end of bb"); });
if (!instr_is_ubr(inst) IF_X86(|| instr_get_opcode(inst) == OP_jmp_far)) {
app_pc target;
if (instr_is_mbr(inst) IF_X86(|| instr_get_opcode(inst) == OP_jmp_far)) {
target = get_ibl_routine(
dcontext, get_ibl_entry_type(instr_branch_type(inst)),
DEFAULT_IBL_TRACE(), get_ibl_branch_type(inst));
} else if (instr_is_cbr(inst)) {
* of translation! (i#509)
*/
target = decode_next_pc(dcontext, xl8);
} else {
target = opnd_get_pc(instr_get_target(inst));
}
ASSERT(target != NULL);
jmp = create_exit_jmp(dcontext, target, xl8, instr_branch_type(inst));
instrlist_postinsert(ilist, inst, jmp);
* d_r_mangle() shouldn't remove the exit cti.
*/
vm_area_destroy_list(dcontext, md->blk_info[blk].vmlist);
md->blk_info[blk].vmlist = NULL;
md->blk_info[blk].end_instr = jmp;
} else
md->blk_info[blk].end_instr = inst;
blk++;
DOLOG(4, LOG_INTERP, {
if (blk < md->num_blks) {
LOG(THREAD, LOG_MONITOR, 4, "starting next bb " PFX "\n",
md->blk_info[blk].info.tag);
}
});
if (blk >= md->num_blks && next_inst != NULL) {
CLIENT_ASSERT(false, "unsupported trace modification: too many exits");
return false;
}
}
#if defined(RETURN_AFTER_CALL) || defined(RCT_IND_BRANCH)
* and then adjust after fixup_last_cti.
*/
if (instr_will_be_exit_cti(inst))
md->blk_info[blk].info.num_exits++;
#endif
}
if (blk < md->num_blks) {
ASSERT(!instr_is_ubr(instrlist_last(ilist)));
if (blk + 1 < md->num_blks) {
CLIENT_ASSERT(false, "unsupported trace modification: too few exits");
return false;
}
jmp = create_exit_jmp(dcontext, fallthrough, fallthrough, 0);
* syscalls and ints: need to re-analyze. Then we wouldn't
* need the md->final_exit_flags field anymore.
* For now we disallow.
*/
if (found_syscall || found_int) {
instr_exit_branch_set_type(jmp, md->final_exit_flags);
#ifdef WINDOWS
* survives to here if the instr is not clobbered,
* and does not come from md->final_exit_flags
*/
if (TEST(INSTR_SHARED_SYSCALL, instrlist_last(ilist)->flags)) {
instr_set_target(jmp, opnd_create_pc(shared_syscall_routine(dcontext)));
instr_set_our_mangling(jmp, true);
}
if (!TESTANY(LINK_NI_SYSCALL_ALL IF_WINDOWS(| LINK_CALLBACK_RETURN),
md->final_exit_flags) &&
!TEST(INSTR_SHARED_SYSCALL, instrlist_last(ilist)->flags)) {
CLIENT_ASSERT(false,
"client modified or added a syscall or int: unsupported");
return false;
}
#endif
}
instrlist_append(ilist, jmp);
md->blk_info[blk].end_instr = jmp;
} else {
CLIENT_ASSERT((!found_syscall && !found_int)
* FIXME PR 307284: see notes above. */
IF_UNIX(|| !TEST(LINK_NI_SYSCALL, md->final_exit_flags)),
"client changed exit target where unsupported\n"
"check if trace ends in a syscall or int");
}
ASSERT(instr_is_ubr(instrlist_last(ilist)));
if (found_syscall)
md->trace_flags |= FRAG_HAS_SYSCALL;
else
md->trace_flags &= ~FRAG_HAS_SYSCALL;
DOLOG(4, LOG_INTERP, {
LOG(THREAD, LOG_INTERP, 4, "trace ilist before mangling:\n");
instrlist_disassemble(dcontext, md->trace_tag, ilist, THREAD);
});
d_r_mangle(dcontext, ilist, &md->trace_flags, true ,
TEST(FRAG_HAS_TRANSLATION_INFO, md->trace_flags));
DOLOG(4, LOG_INTERP, {
LOG(THREAD, LOG_INTERP, 4, "trace ilist after mangling:\n");
instrlist_disassemble(dcontext, md->trace_tag, ilist, THREAD);
});
for (blk = 0; blk < md->num_blks && md->blk_info[blk].end_instr == NULL; blk++)
;
start_instr = instrlist_first(ilist);
for (inst = instrlist_first(ilist); inst != NULL; inst = next_inst) {
next_inst = instr_get_next(inst);
if (inst == md->blk_info[blk].end_instr) {
if (blk + 1 < md->num_blks) {
* created traces in whether eflags are restored post-cmp
*/
uint next_flags =
forward_eflags_analysis(dcontext, ilist, instr_get_next(inst));
next_flags = instr_eflags_to_fragment_eflags(next_flags);
LOG(THREAD, LOG_INTERP, 4, "next_flags for fixup_last_cti: 0x%x\n",
next_flags);
fixup_last_cti(dcontext, ilist, md->blk_info[blk + 1].info.tag,
next_flags, md->trace_flags, NULL, NULL,
TEST(FRAG_HAS_TRANSLATION_INFO, md->trace_flags),
&num_exits_deleted,
start_instr, inst);
#if defined(RETURN_AFTER_CALL) || defined(RCT_IND_BRANCH)
md->blk_info[blk].info.num_exits -= num_exits_deleted;
#endif
}
blk++;
while (blk < md->num_blks && md->blk_info[blk].end_instr == NULL)
blk++;
if (blk >= md->num_blks && next_inst != NULL) {
CLIENT_ASSERT(false, "unsupported trace modification: exits modified");
return false;
}
start_instr = next_inst;
}
}
if (blk < md->num_blks) {
CLIENT_ASSERT(false, "unsupported trace modification: cannot find all exits");
return false;
}
return true;
}
* UTILITIES
*/
* assuming that the instr_t flags correspond to the start of the fragment_t.
* Assumes instr_eflags has already accounted for predication.
*/
uint
instr_eflags_to_fragment_eflags(uint instr_eflags)
{
uint frag_eflags = 0;
#ifdef X86
if (instr_eflags == EFLAGS_WRITE_OF) {
* May still read other flags before writing them.
*/
frag_eflags |= FRAG_WRITES_EFLAGS_OF;
return frag_eflags;
}
#endif
if (instr_eflags == EFLAGS_WRITE_ARITH) {
frag_eflags |= FRAG_WRITES_EFLAGS_ARITH;
#ifdef X86
frag_eflags |= FRAG_WRITES_EFLAGS_OF;
#endif
}
return frag_eflags;
}
* EFLAGS_WRITE_ARITH = writes all arith flags before reading any
* EFLAGS_WRITE_OF = writes OF before reading it (x86-only)
* EFLAGS_READ_ARITH = reads some of arith flags before writing
* EFLAGS_READ_OF = reads OF before writing OF (x86-only)
* 0 = no information before 1st cti
*/
uint
forward_eflags_analysis(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr)
{
instr_t *in;
uint eflags_6 = 0;
int eflags_result = 0;
for (in = instr; in != NULL; in = instr_get_next_expanded(dcontext, ilist, in)) {
if (!instr_valid(in) || instr_is_cti(in)) {
break;
}
if (eflags_result != EFLAGS_WRITE_ARITH IF_X86(&&eflags_result != EFLAGS_READ_OF))
eflags_result = eflags_analysis(in, eflags_result, &eflags_6);
DOLOG(4, LOG_INTERP, {
d_r_loginst(dcontext, 4, in, "forward_eflags_analysis");
LOG(THREAD, LOG_INTERP, 4, "\tinstr %x => %x\n",
instr_get_eflags(in, DR_QUERY_DEFAULT), eflags_result);
});
}
return eflags_result;
}
* If buf is NULL:
* The Instrs returned point into f's raw bits, so encode them
* before you delete f!
* Else, f's raw bits are copied into buf, and *bufsz is modified to
* contain the total bytes copied
* FIXME: should have release build checks and not just asserts where
* we rely on caller to have big-enough buffer?
* If target_flags differ from f->flags in sharing and/or in trace-ness,
* converts ibl and tls usage in f to match the desired target_flags.
* FIXME: converting from private to shared tls is not yet
* implemented: we rely on -private_ib_in_tls for adding normal
* private bbs to shared traces, and disallow any extensive mangling
* (native_exec, selfmod) from becoming shared traces.
* The caller is responsible for destroying the instrlist and its instrs.
* If the fragment ends in an elided jmp, a new jmp instr is created, though
* its bits field is NULL, allowing the caller to set it to do-not-emit if
* trying to exactly duplicate or calculate the size, though most callers
* will want to emit that jmp. See decode_fragment_exact().
*/
static void
instr_set_raw_bits_trace_buf(instr_t *instr, byte *buf_writable_addr, uint length)
{
* executable address for pointing at bits.
*/
instr_set_raw_bits(instr, vmcode_get_executable_addr(buf_writable_addr), length);
}
#define DF_LOGLEVEL(dc) (((dc) != GLOBAL_DCONTEXT && (dc)->in_opnd_disassemble) ? 6U : 4U)
instrlist_t *
decode_fragment(dcontext_t *dcontext, fragment_t *f, byte *buf, uint *bufsz,
uint target_flags, uint *dir_exits, uint *indir_exits)
{
linkstub_t *l;
cache_pc start_pc, stop_pc, pc, prev_pc = NULL, raw_start_pc;
instr_t *instr, *cti = NULL, *raw_instr;
instrlist_t *ilist = instrlist_create(dcontext);
byte *top_buf = NULL, *cur_buf = NULL;
app_pc target_tag;
uint num_bytes, offset;
uint num_dir = 0, num_indir = 0;
bool tls_to_dc;
bool shared_to_private =
TEST(FRAG_SHARED, f->flags) && !TEST(FRAG_SHARED, target_flags);
#ifdef WINDOWS
* the following conditions are satisfied. */
bool possible_ignorable_sysenter = DYNAMO_OPTION(ignore_syscalls) &&
(get_syscall_method() == SYSCALL_METHOD_SYSENTER) &&
TEST(FRAG_HAS_SYSCALL, f->flags);
#endif
instrlist_t intra_ctis;
coarse_info_t *info = NULL;
bool coarse_elided_ubrs = false;
dr_isa_mode_t old_mode;
DEBUG_DECLARE(bool ok =)
dr_set_isa_mode(dcontext, FRAG_ISA_MODE(f->flags), &old_mode);
ASSERT(ok);
* may mess up the 32-bit/64-bit mode in -x86_to_x64 because 32-bit
* application code is encoded as 64-bit code fragments into the code cache.
* Thus we currently do not support using decode_fragment with -x86_to_x64,
* including trace and coarse_units (coarse-grain code cache management)
*/
IF_X86_64(ASSERT(!DYNAMO_OPTION(x86_to_x64)));
instrlist_init(&intra_ctis);
* non-exit cti's with off-fragment targets that need to be re-pc-relativized.
* The rest of the instructions can be lumped into raw instructions.
*/
start_pc = FCACHE_ENTRY_PC(f);
pc = start_pc;
raw_start_pc = start_pc;
if (buf != NULL) {
cur_buf = buf;
top_buf = cur_buf;
ASSERT(bufsz != NULL);
}
* Handle coarse-grain fake fragment_t by discovering exits as we go, with
* l being NULL the whole time.
*/
if (TEST(FRAG_FAKE, f->flags)) {
ASSERT(TEST(FRAG_COARSE_GRAIN, f->flags));
info = get_fragment_coarse_info(f);
ASSERT(info != NULL);
coarse_elided_ubrs =
(info->persisted && TEST(PERSCACHE_ELIDED_UBR, info->flags)) ||
(!info->persisted && DYNAMO_OPTION(coarse_freeze_elide_ubr));
* that are not exit ctis
*/
l = NULL;
} else
l = FRAGMENT_EXIT_STUBS(f);
while (true) {
uint l_flags;
cti = NULL;
if (l != NULL) {
stop_pc = EXIT_CTI_PC(f, l);
} else if (TEST(FRAG_FAKE, f->flags)) {
stop_pc = (cache_pc)UNIVERSAL_REGION_END;
} else {
stop_pc = fragment_body_end_pc(dcontext, f);
if (PAD_FRAGMENT_JMPS(f->flags) && stop_pc != raw_start_pc) {
* way any code could get here is via client interface,
* and there really is no nice way to distinguish it
* from any padding we added.
* PR 213005: we do not support decode_fragment() for bbs
* that have code added beyond the last exit cti (we turn
* off FRAG_COARSE_GRAIN and set FRAG_CANNOT_BE_TRACE).
* Sanity check, make sure it at least looks like there is no
* code here */
ASSERT(IS_SET_TO_DEBUG(raw_start_pc, stop_pc - raw_start_pc));
stop_pc = raw_start_pc;
}
}
IF_X64(ASSERT(TEST(FRAG_FAKE, f->flags) ||
CHECK_TRUNCATE_TYPE_uint((stop_pc - raw_start_pc))));
num_bytes = (uint)(stop_pc - raw_start_pc);
LOG(THREAD, LOG_MONITOR, DF_LOGLEVEL(dcontext),
"decoding fragment from " PFX " to " PFX "\n", raw_start_pc, stop_pc);
if (num_bytes > 0) {
if (buf != NULL) {
if (TEST(FRAG_FAKE, f->flags)) {
* we do point instrs into buf before we copy!
*/
} else {
* so we don't have to copy it in pieces if we find cti's, if we don't
* find any we want one giant piece anyway
*/
ASSERT(cur_buf + num_bytes < buf + *bufsz);
memcpy(cur_buf, raw_start_pc, num_bytes);
top_buf = cur_buf + num_bytes;
LOG(THREAD, LOG_MONITOR, DF_LOGLEVEL(dcontext),
"decode_fragment: copied " PFX "-" PFX " to " PFX "-" PFX "\n",
raw_start_pc, raw_start_pc + num_bytes, cur_buf,
cur_buf + num_bytes);
* of raw bytes for next-to-add-to-ilist instr, while
* top_buf points to top of copied-to-buf data
*/
}
} else {
cur_buf = raw_start_pc;
}
* be calls with off-fragment targets in there that need to be
* re-pc-relativized (instrumentation, etc. insert calls), or
* we may not even know where the exit ctis are (coarse-grain fragments),
* so walk through (original bytes!) and decode, looking for cti's
*/
instr = instr_create(dcontext);
pc = raw_start_pc;
* be careful -- there can be private bbs w/ indirect branches, so
* must see if this is a shared fragment we're adding
*/
tls_to_dc = (shared_to_private && !DYNAMO_OPTION(private_ib_in_tls) &&
(l == NULL || LINKSTUB_INDIRECT(l->flags)));
do {
#ifdef WINDOWS
cache_pc prev_decode_pc = prev_pc;
* previous decode, the instr
* before the one 'pc'
* currently points to *before*
* the call to decode() just
* below */
#endif
* every instr for a potential next fragment start. This is
* expensive so users are advised to decode from app code if
* possible (case 9325 -- need exact re-mangle + re-instrument),
* though -coarse_pclookup_table helps.
*/
if (info != NULL && info->frozen && coarse_elided_ubrs &&
pc != start_pc) {
bool stop = false;
if (coarse_is_indirect_stub(pc)) {
stop = true;
LOG(THREAD, LOG_MONITOR, DF_LOGLEVEL(dcontext) - 1,
"\thit ib stub @" PFX "\n", pc);
} else {
app_pc tag = fragment_coarse_entry_pclookup(dcontext, info, pc);
if (tag != NULL) {
stop = true;
LOG(THREAD, LOG_MONITOR, DF_LOGLEVEL(dcontext) - 1,
"\thit frozen tgt: " PFX "." PFX "\n", tag, pc);
}
}
if (stop) {
ASSERT(cti == NULL);
cti = XINST_CREATE_jump(dcontext, opnd_create_pc(pc));
* as do-not-emit or not */
stop_pc = pc;
pc = stop_pc;
break;
}
}
instr_reset(dcontext, instr);
prev_pc = pc;
pc = IF_AARCH64_ELSE(decode_cti_with_ldstex, decode_cti)(dcontext, pc,
instr);
DOLOG(DF_LOGLEVEL(dcontext), LOG_INTERP,
{ disassemble_with_info(dcontext, prev_pc, THREAD, true, true); });
#ifdef WINDOWS
if (possible_ignorable_sysenter && instr_opcode_valid(instr) &&
instr_is_syscall(instr)) {
* it point to the post-sysenter instr in the trace, rather than
* remain pointing to the post-sysenter instr in the BB.
*/
instr_t *sysenter_prev;
instr_t *sysenter_post;
ASSERT(prev_decode_pc != NULL);
LOG(THREAD, LOG_MONITOR, DF_LOGLEVEL(dcontext),
"decode_fragment: sysenter found @" PFX "\n",
instr_get_raw_bits(instr));
* sysenter EXCEPT for the immediately preceding instruction
*/
offset = (int)(prev_decode_pc - raw_start_pc);
ASSERT(offset > 0);
raw_instr = instr_create(dcontext);
instr_set_raw_bits_trace_buf(raw_instr, cur_buf, offset);
instrlist_append(ilist, raw_instr);
cur_buf += offset;
* it's there because mangle put it there, so we can safely
* decode at prev_decode_pc.
*/
sysenter_prev = instr_create(dcontext);
decode(dcontext, prev_decode_pc, sysenter_prev);
ASSERT(instr_valid(instr) && instr_is_mov_imm_to_tos(sysenter_prev));
instrlist_append(ilist, sysenter_prev);
cur_buf += instr_length(dcontext, sysenter_prev);
instr_set_raw_bits_trace_buf(instr, cur_buf, (int)(pc - prev_pc));
instrlist_append(ilist, instr);
instr_set_meta(instr);
cur_buf += (int)(pc - prev_pc);
sysenter_post = instr_create(dcontext);
prev_decode_pc = pc;
prev_pc = pc;
pc = decode(dcontext, pc, sysenter_post);
if (DYNAMO_OPTION(ignore_syscalls_follow_sysenter))
ASSERT(!instr_is_cti(sysenter_post));
raw_start_pc = pc;
cur_buf += (int)(pc - prev_pc);
instrlist_append(ilist, sysenter_post);
instr_set_src(sysenter_prev, 0, opnd_create_instr(sysenter_post));
instr_set_meta(sysenter_prev);
instr_set_meta(sysenter_post);
DOLOG(DF_LOGLEVEL(dcontext), LOG_INTERP, {
LOG(THREAD, LOG_INTERP, DF_LOGLEVEL(dcontext),
"Post-sysenter -- F%d (" PFX ") into:\n", f->id, f->tag);
instrlist_disassemble(dcontext, f->tag, ilist, THREAD);
});
* process the post-sysenter instruction. Point instr to the
* already decoded instruction, sysenter_post. At this point,
* pc and raw_start_pc point to just after sysenter_post,
* prev_pc points to sysenter_post, prev_decode_pc points to
* the sysenter itself, and cur_buf points to post_sysenter.
*/
instr = sysenter_post;
}
#endif
if (instr_opcode_valid(instr) && instr_is_cti(instr)) {
bool separate_cti = false;
bool re_relativize = false;
bool intra_target = true;
DOLOG(DF_LOGLEVEL(dcontext), LOG_MONITOR, {
d_r_loginst(dcontext, 4, instr,
"decode_fragment: found non-exit cti");
});
if (TEST(FRAG_FAKE, f->flags)) {
* distinguish off-fragment from intra-fragment targets.
* Thus we have to assume that any cti is an exit cti, and
* make all fragments for which that is not true into
* fine-grained.
* Except that we want to support intra-fragment ctis for
* clients (i#665), so we use some heuristics.
*/
if (instr_is_cti_short_rewrite(instr, prev_pc)) {
* We must do this before asking whether it's an
* intra-fragment so we don't just look at the
* first part of the sequence.
*/
pc = remangle_short_rewrite(dcontext, instr, prev_pc,
0 );
}
if (!coarse_cti_is_intra_fragment(dcontext, info, instr,
start_pc)) {
* re-copy the raw bytes from this cti to the end of the
* fragment at the top of the next loop iter, but for
* coarse-grain bbs that should be just one instr for cbr bbs
* or none for others, so not worth doing anything about.
*/
DOLOG(DF_LOGLEVEL(dcontext), LOG_MONITOR, {
d_r_loginst(dcontext, DF_LOGLEVEL(dcontext), instr,
"\tcoarse exit cti");
});
intra_target = false;
stop_pc = prev_pc;
pc = stop_pc;
break;
} else {
DOLOG(DF_LOGLEVEL(dcontext), LOG_MONITOR, {
d_r_loginst(dcontext, DF_LOGLEVEL(dcontext), instr,
"\tcoarse intra-fragment cti");
});
}
} else if (instr_is_return(instr) ||
!opnd_is_near_pc(instr_get_target(instr))) {
intra_target = false;
} else if (instr_is_cti_short_rewrite(instr, prev_pc)) {
* separated out unless we're decoding a fake fragment. We
* include this case for future use, as otherwise we'll
* decode just the short cti and think it is an
* intra-fragment cti.
*/
ASSERT_NOT_REACHED();
separate_cti = true;
re_relativize = true;
intra_target = false;
} else if (opnd_get_pc(instr_get_target(instr)) < start_pc ||
opnd_get_pc(instr_get_target(instr)) >
start_pc + f->size) {
separate_cti = true;
re_relativize = true;
intra_target = false;
DOLOG(DF_LOGLEVEL(dcontext), LOG_MONITOR, {
d_r_loginst(dcontext, 4, instr,
"\tcti has off-fragment target");
});
}
if (intra_target) {
* from pc to instr_t in second pass, so remember it here
*/
instr_t *clone = instr_clone(dcontext, instr);
instr_set_note(clone, (void *)instr);
instrlist_append(&intra_ctis, clone);
DOLOG(DF_LOGLEVEL(dcontext), LOG_MONITOR, {
d_r_loginst(dcontext, 4, instr,
"\tcti has intra-fragment target");
});
* we need to change the target operand from pc to instr_t.
* that requires having this instr separated out now so
* our clone-in-note-field hack above works.
*/
separate_cti = true;
re_relativize = false;
}
if (separate_cti) {
offset = (int)(prev_pc - raw_start_pc);
if (offset > 0) {
raw_instr = instr_create(dcontext);
instr_set_raw_bits_trace_buf(raw_instr, cur_buf, offset);
instrlist_append(ilist, raw_instr);
cur_buf += offset;
raw_start_pc = prev_pc;
}
* re-encoded, and that it is not an exit cti
*/
instr_set_meta(instr);
if (re_relativize)
instr_set_raw_bits_valid(instr, false);
else if (!instr_is_cti_short_rewrite(instr, NULL)) {
instr_set_raw_bits_trace_buf(instr, cur_buf,
(int)(pc - prev_pc));
}
instrlist_append(ilist, instr);
cur_buf += (int)(pc - prev_pc);
raw_start_pc = pc;
instr = instr_create(dcontext);
}
}
else if (tls_to_dc && instr_is_tls_xcx_spill(instr)) {
IF_X64(ASSERT_NOT_IMPLEMENTED(false));
LOG(THREAD, LOG_MONITOR, DF_LOGLEVEL(dcontext),
"mangling xcx save from tls to dcontext\n");
offset = (int)(prev_pc - raw_start_pc);
if (offset > 0) {
raw_instr = instr_create(dcontext);
instr_set_raw_bits_trace_buf(raw_instr, cur_buf, offset);
instrlist_append(ilist, raw_instr);
cur_buf += offset;
raw_start_pc = prev_pc;
}
instrlist_append(ilist,
instr_create_save_to_dcontext(dcontext, SCRATCH_REG2,
SCRATCH_REG2_OFFS));
cur_buf += (int)(pc - prev_pc);
raw_start_pc = pc;
}
#if defined(X86) && defined(X64)
else if (instr_has_rel_addr_reference(instr)) {
* level 1 instrs (PR 251479), and only when raw bits point to
* their original location. We assume that all the if statements
* above end up creating a high-level instr, so a cti w/ a
* rip-rel operand is already covered.
*/
offset = (int)(prev_pc - raw_start_pc);
if (offset > 0) {
raw_instr = instr_create(dcontext);
instr_set_raw_bits_trace_buf(raw_instr, cur_buf, offset);
instrlist_append(ilist, raw_instr);
cur_buf += offset;
raw_start_pc = prev_pc;
}
ASSERT(instr_rip_rel_valid(instr));
if (buf != NULL) {
DEBUG_DECLARE(byte *nxt =)
instr_encode_to_copy(dcontext, instr, cur_buf,
vmcode_get_executable_addr(cur_buf));
instr_set_raw_bits_trace_buf(instr,
vmcode_get_executable_addr(cur_buf),
(int)(pc - prev_pc));
instr_set_rip_rel_valid(instr, true);
ASSERT(nxt != NULL);
}
instrlist_append(ilist, instr);
cur_buf += (int)(pc - prev_pc);
raw_start_pc = pc;
instr = instr_create(dcontext);
}
#endif
} while (pc < stop_pc);
DODEBUG({
if (pc != stop_pc) {
LOG(THREAD, LOG_MONITOR, DF_LOGLEVEL(dcontext),
"PC " PFX ", stop_pc " PFX "\n", pc, stop_pc);
}
});
ASSERT(pc == stop_pc);
cache_pc next_pc = pc;
if (l != NULL && TEST(LINK_PADDED, l->flags) && instr_is_nop(instr)) {
LOG(THREAD, LOG_MONITOR, DF_LOGLEVEL(dcontext) - 1,
"%s: removing padding nop @" PFX "\n", __FUNCTION__, prev_pc);
pc = prev_pc;
if (buf != NULL)
top_buf -= instr_length(dcontext, instr);
}
if (pc > raw_start_pc) {
instr_reset(dcontext, instr);
offset = (int)(pc - raw_start_pc);
if (offset > 0) {
instr_set_raw_bits_trace_buf(instr, cur_buf, offset);
instrlist_append(ilist, instr);
cur_buf += offset;
}
if (buf != NULL && TEST(FRAG_FAKE, f->flags)) {
* We have been incrementing cur_buf all along, though
* we didn't have contents there.
*/
ASSERT(top_buf < cur_buf);
IF_X64(ASSERT(CHECK_TRUNCATE_TYPE_uint((cur_buf - top_buf))));
num_bytes = (uint)(cur_buf - top_buf);
ASSERT(cur_buf + num_bytes < buf + *bufsz);
memcpy(cur_buf, raw_start_pc, num_bytes);
top_buf = cur_buf + num_bytes;
LOG(THREAD, LOG_MONITOR, DF_LOGLEVEL(dcontext),
"decode_fragment: copied " PFX "-" PFX " to " PFX "-" PFX "\n",
raw_start_pc, raw_start_pc + num_bytes, cur_buf,
cur_buf + num_bytes);
}
ASSERT(buf == NULL || cur_buf == top_buf);
} else {
* immediately prior to exit cti, so now don't need instr -- an
* example (in absence of clients) is trampoline to interception code
*/
instr_destroy(dcontext, instr);
}
pc = next_pc;
}
if (l == NULL && !TEST(FRAG_FAKE, f->flags))
break;
if (cti != NULL) {
instr = cti;
ASSERT(info != NULL && info->frozen && instr_is_ubr(instr));
raw_start_pc = pc;
} else {
instr = instr_create(dcontext);
raw_start_pc = decode(dcontext, stop_pc, instr);
ASSERT(raw_start_pc != NULL);
}
ASSERT(instr_is_ubr(instr) || instr_is_cbr(instr));
if (l == NULL) {
app_pc instr_tgt;
if (instr_is_cti_short_rewrite(instr, stop_pc))
remangle_short_rewrite(dcontext, instr, stop_pc, 0 );
instr_tgt = opnd_get_pc(instr_get_target(instr));
ASSERT(TEST(FRAG_COARSE_GRAIN, f->flags));
if (cti == NULL && coarse_is_entrance_stub(instr_tgt)) {
target_tag = entrance_stub_target_tag(instr_tgt, info);
l_flags = LINK_DIRECT;
LOG(THREAD, LOG_MONITOR, DF_LOGLEVEL(dcontext) - 1,
"\tstub tgt: " PFX " => " PFX "\n", instr_tgt, target_tag);
} else if (instr_tgt == raw_start_pc
* coarse_elided_ubrs but we need to know whether ALL
* ubrs were elided, which we don't know as normally
* entire-bb-ubrs are not elided (case 9677).
* plus now that we elide jmp-to-ib-stub we must check.
*/
&& coarse_is_indirect_stub(instr_tgt)) {
ibl_type_t ibl_type;
DEBUG_DECLARE(bool is_ibl;)
target_tag = coarse_indirect_stub_jmp_target(instr_tgt);
l_flags = LINK_INDIRECT;
DEBUG_DECLARE(is_ibl =)
get_ibl_routine_type_ex(dcontext, target_tag, &ibl_type _IF_X86_64(NULL));
ASSERT(is_ibl);
l_flags |= ibltype_to_linktype(ibl_type.branch_type);
LOG(THREAD, LOG_MONITOR, DF_LOGLEVEL(dcontext) - 1,
"\tind stub tgt: " PFX " => " PFX "\n", instr_tgt, target_tag);
} else {
target_tag = fragment_coarse_entry_pclookup(dcontext, info, instr_tgt);
ASSERT(info != NULL && info->frozen);
ASSERT(target_tag != NULL);
l_flags = LINK_DIRECT;
LOG(THREAD, LOG_MONITOR, DF_LOGLEVEL(dcontext) - 1,
"\tfrozen tgt: " PFX "." PFX "\n", target_tag, instr_tgt);
}
} else {
target_tag = EXIT_TARGET_TAG(dcontext, f, l);
l_flags = l->flags;
}
if (LINKSTUB_DIRECT(l_flags))
num_dir++;
else
num_indir++;
ASSERT(target_tag != NULL);
if (instr_is_cti_short_rewrite(instr, stop_pc)) {
raw_start_pc = remangle_short_rewrite(dcontext, instr, stop_pc, target_tag);
} else {
app_pc new_target = target_tag;
instr_set_raw_bits_valid(instr, false);
LOG(THREAD, LOG_MONITOR, DF_LOGLEVEL(dcontext) - 1,
"decode_fragment exit_cti: pc=" PFX " l->target_tag=" PFX
" l->flags=0x%x\n",
stop_pc, target_tag, l_flags);
* instr_t flag copied from old fragment linkstub
* TODO: when ibl targets are different this won't be necessary
*/
instr_exit_branch_set_type(instr, linkstub_propagatable_flags(l_flags));
if (is_indirect_branch_lookup_routine(dcontext, target_tag)) {
DEBUG_DECLARE(app_pc old_target = new_target;)
new_target =
get_alternate_ibl_routine(dcontext, target_tag, target_flags);
ASSERT(new_target != NULL);
* FRAG_IS_TRACE then we are extending a trace
*/
DODEBUG({
LOG(THREAD, LOG_MONITOR, DF_LOGLEVEL(dcontext) - 1,
"%s: %s ibl_routine " PFX " with %s_target=" PFX "\n",
TEST(FRAG_IS_TRACE, target_flags) ? "extend_trace"
: "decode_fragment",
new_target == old_target ? "maintaining" : "replacing",
old_target, new_target == old_target ? "old" : "new", new_target);
STATS_INC(num_traces_ibl_extended);
});
#ifdef WINDOWS
DOSTATS({
if (TEST(FRAG_IS_TRACE, target_flags) &&
old_target == shared_syscall_routine(dcontext))
STATS_INC(num_traces_shared_syscall_extended);
});
#endif
}
instr_set_target(instr, opnd_create_pc(new_target));
if (instr_is_cti_short(instr)) {
* us and never left there from apps, are not marked as exit ctis
*/
instr_set_meta(instr);
}
}
instrlist_append(ilist, instr);
if (TEST(FRAG_FAKE, f->flags)) {
* and no code beyond the last exit! Of course frozen bbs
* can have their final jmp elided, which we handle above.
*/
if (instr_is_ubr(instr)) {
break;
}
}
if (l != NULL)
l = LINKSTUB_NEXT_EXIT(l);
}
if (instrlist_first(&intra_ctis) != NULL) {
* Any instrs that need re-relativization should already be
* separate, so this should not affect rip-rel instrs.
*/
int offs = 0;
for (instr = instrlist_first_expanded(dcontext, ilist); instr != NULL;
instr = instr_get_next_expanded(dcontext, ilist, instr)) {
for (cti = instrlist_first(&intra_ctis); cti != NULL;
cti = instr_get_next(cti)) {
* original bits, so its target will be in original fragment body.
* We can't rely on the raw bits of the new instrs (since the
* non-level-0 ones may have allocated raw bits) so we
* calculate a running offset as we go.
*/
if (opnd_get_pc(instr_get_target(cti)) - start_pc == offs) {
instr_t *real_cti = (instr_t *)instr_get_note(cti);
* instrlist may change (e.g., inserted nops). Must re-encode
* once instrlist is finalized.
*/
instr_set_target(real_cti, opnd_create_instr(instr));
DOLOG(DF_LOGLEVEL(dcontext), LOG_MONITOR, {
d_r_loginst(dcontext, 4, real_cti,
"\tre-set intra-fragment target");
});
break;
}
}
offs += instr_length(dcontext, instr);
}
}
instrlist_clear(dcontext, &intra_ctis);
DOLOG(DF_LOGLEVEL(dcontext), LOG_INTERP, {
LOG(THREAD, LOG_INTERP, DF_LOGLEVEL(dcontext),
"Decoded F%d (" PFX "." PFX ") into:\n", f->id, f->tag, FCACHE_ENTRY_PC(f));
instrlist_disassemble(dcontext, f->tag, ilist, THREAD);
});
DEBUG_DECLARE(ok =) dr_set_isa_mode(dcontext, old_mode, NULL);
ASSERT(ok);
if (dir_exits != NULL)
*dir_exits = num_dir;
if (indir_exits != NULL)
*indir_exits = num_indir;
if (buf != NULL) {
IF_X64(ASSERT(CHECK_TRUNCATE_TYPE_uint((top_buf - buf))));
*bufsz = (uint)(top_buf - buf);
}
return ilist;
}
#undef DF_LOGLEVEL
* as do-not-emit
*/
instrlist_t *
decode_fragment_exact(dcontext_t *dcontext, fragment_t *f, byte *buf,
uint *bufsz, uint target_flags,
uint *dir_exits, uint *indir_exits)
{
instrlist_t *ilist =
decode_fragment(dcontext, f, buf, bufsz, target_flags, dir_exits, indir_exits);
if (instr_get_raw_bits(instrlist_last(ilist)) == NULL) {
instr_set_ok_to_emit(instrlist_last(ilist), false);
}
return ilist;
}
* If replace is true,
* removes f from the fcache and adds the new copy in its place
* Else
* creates f as an invisible fragment (caller is responsible for linking
* the new fragment!)
*/
fragment_t *
copy_fragment(dcontext_t *dcontext, fragment_t *f, bool replace)
{
instrlist_t *trace = instrlist_create(dcontext);
instr_t *instr;
uint *trace_buf;
int trace_buf_top;
linkstub_t *l;
byte *p;
cache_pc start_pc;
int num_bytes;
fragment_t *new_f;
void *vmlist = NULL;
app_pc target_tag;
DEBUG_DECLARE(bool ok;)
trace_buf = heap_alloc(dcontext, f->size * 2 HEAPACCT(ACCT_FRAGMENT));
start_pc = FCACHE_ENTRY_PC(f);
trace_buf_top = 0;
p = ((byte *)trace_buf) + trace_buf_top;
IF_X64(ASSERT_NOT_IMPLEMENTED(false));
for (l = FRAGMENT_EXIT_STUBS(f); l; l = LINKSTUB_NEXT_EXIT(l)) {
* control-transfer instruction. ***WARNING*** This code assumes
* that the first link stub corresponds to the first exit branch
* in the body. */
IF_X64(ASSERT(CHECK_TRUNCATE_TYPE_uint((EXIT_CTI_PC(f, l) - start_pc))));
num_bytes = (uint)(EXIT_CTI_PC(f, l) - start_pc);
if (num_bytes > 0) {
memcpy(p, (byte *)start_pc, num_bytes);
trace_buf_top += num_bytes;
start_pc += num_bytes;
instr = instr_create(dcontext);
instr_set_raw_bits(instr, p, num_bytes);
instrlist_append(trace, instr);
}
instr = instr_create(dcontext);
p = decode(dcontext, (byte *)EXIT_CTI_PC(f, l), instr);
ASSERT(p != NULL);
ASSERT(instr_is_ubr(instr) || instr_is_cbr(instr));
* an indirect branch, the target_tag is zero. */
target_tag = EXIT_TARGET_TAG(dcontext, f, l);
ASSERT(target_tag);
if (instr_is_cti_short_rewrite(instr, EXIT_CTI_PC(f, l))) {
p = remangle_short_rewrite(dcontext, instr, EXIT_CTI_PC(f, l), target_tag);
} else {
ASSERT(!instr_is_cti_short(instr));
instr_set_target(instr, opnd_create_pc(target_tag));
}
instrlist_append(trace, instr);
start_pc += (p - (byte *)EXIT_CTI_PC(f, l));
}
ASSERT_NOT_IMPLEMENTED(!TEST(FRAG_SHARED, f->flags));
DEBUG_DECLARE(ok =)
vm_area_add_to_list(dcontext, f->tag, &vmlist, f->flags, f, false );
ASSERT(ok);
new_f = emit_invisible_fragment(dcontext, f->tag, trace, f->flags, vmlist);
if (replace) {
shift_links_to_new_fragment(dcontext, f, new_f);
fragment_replace(dcontext, f, new_f);
} else {
}
ASSERT(new_f->flags == f->flags);
fragment_copy_data_fields(dcontext, f, new_f);
#ifdef DEBUG
if (d_r_stats->loglevel > 1) {
LOG(THREAD, LOG_ALL, 2, "Copying F%d to F%d\n", f->id, new_f->id);
disassemble_fragment(dcontext, f, d_r_stats->loglevel < 3);
disassemble_fragment(dcontext, new_f, d_r_stats->loglevel < 3);
}
#endif
heap_free(dcontext, trace_buf, f->size * 2 HEAPACCT(ACCT_FRAGMENT));
instrlist_clear_and_destroy(dcontext, trace);
if (replace) {
fragment_delete(dcontext, f,
FRAGDEL_NO_OUTPUT | FRAGDEL_NO_UNLINK | FRAGDEL_NO_HTABLE);
STATS_INC(num_fragments_deleted_copy_and_replace);
}
return new_f;
}
* and all of the relative ctis that target outside the cache need
* to be shifted. Additionally, sysenter-related patching for ignore-syscalls
* on XP/2003 is performed here, as the absolute code cache address pushed
* onto the stack must be updated.
* Assumption: old code cache has been copied to TOP of new cache, so to
* detect for ctis targeting outside of old cache can look at new cache
* start plus old cache size.
*/
void
shift_ctis_in_fragment(dcontext_t *dcontext, fragment_t *f, ssize_t shift,
cache_pc fcache_start, cache_pc fcache_end, size_t old_size)
{
cache_pc pc, prev_pc = NULL;
cache_pc start_pc = FCACHE_ENTRY_PC(f);
cache_pc stop_pc = fragment_stubs_end_pc(f);
cache_pc fcache_old_end = fcache_start + old_size;
#ifdef WINDOWS
* the following conditions are satisfied. */
bool possible_ignorable_sysenter = DYNAMO_OPTION(ignore_syscalls) &&
(get_syscall_method() == SYSCALL_METHOD_SYSENTER) &&
* that flag for all fragments. */
(TEST(FRAG_HAS_SYSCALL, f->flags) || TEST(FRAG_IS_TRACE, f->flags));
#endif
instr_t instr;
instr_init(dcontext, &instr);
pc = start_pc;
while (pc < stop_pc) {
#ifdef WINDOWS
cache_pc prev_decode_pc = prev_pc;
* previous decode, the instr
* before the one 'pc'
* currently points to *before*
* the call to decode_cti() just
* below */
#endif
prev_pc = pc;
instr_reset(dcontext, &instr);
pc = (cache_pc)decode_cti(dcontext, (byte *)pc, &instr);
#ifdef WINDOWS
* XP & 2003. These are not cache-external fixups, but it's convenient &
* efficient to perform them here since decode_cti() is called on every
* instruction, allowing identification of sysenters without additional
* decoding.
*/
if (possible_ignorable_sysenter && instr_opcode_valid(&instr) &&
instr_is_syscall(&instr)) {
cache_pc next_pc;
app_pc target;
DEBUG_DECLARE(app_pc old_target;)
DEBUG_DECLARE(cache_pc encode_nxt;)
instr_reset(dcontext, &instr);
next_pc = decode(dcontext, prev_decode_pc, &instr);
ASSERT(next_pc == prev_pc);
LOG(THREAD, LOG_MONITOR, 4,
"shift_ctis_in_fragment: pre-sysenter mov found @" PFX "\n",
instr_get_raw_bits(&instr));
ASSERT(instr_is_mov_imm_to_tos(&instr));
target = instr_get_raw_bits(&instr) + instr_length(dcontext, &instr) +
(pc - prev_pc);
DODEBUG(old_target = (app_pc)opnd_get_immed_int(instr_get_src(&instr, 0)););
IF_X64(ASSERT_NOT_IMPLEMENTED(false));
instr_set_src(&instr, 0, opnd_create_immed_int((ptr_int_t)target, OPSZ_4));
ASSERT(old_target + shift == target);
LOG(THREAD, LOG_MONITOR, 4,
"shift_ctis_in_fragment: pre-sysenter mov now pts to @" PFX "\n", target);
DEBUG_DECLARE(encode_nxt =)
instr_encode_to_copy(dcontext, &instr,
vmcode_get_writable_addr(prev_decode_pc),
prev_decode_pc);
ASSERT(encode_nxt != NULL &&
vmcode_get_executable_addr(encode_nxt) == next_pc);
}
* a CTI instr, so we don't need an else. We do need to take care
* that any 'else' clauses are added after the 'if' won't trigger
* on a sysenter either.
*/
#endif
* target (anything in-cache is fine, the whole cache was moved)
*/
if (instr_is_cti(&instr) &&
* we really shouldn't see them, except possibly if they
* are inserted through instrumentation, so go ahead and
* check num srcs
*/
instr_num_srcs(&instr) > 0 && opnd_is_near_pc(instr_get_target(&instr))) {
app_pc target = opnd_get_pc(instr_get_target(&instr));
if (target < fcache_start || target > fcache_old_end) {
DEBUG_DECLARE(byte * nxt_pc;)
instr_set_raw_bits_valid(&instr, false);
instr_set_target(&instr, opnd_create_pc(target - shift));
DEBUG_DECLARE(nxt_pc =)
instr_encode_to_copy(dcontext, &instr, vmcode_get_writable_addr(prev_pc),
prev_pc);
ASSERT(nxt_pc != NULL && vmcode_get_executable_addr(nxt_pc) == pc);
#ifdef DEBUG
if ((d_r_stats->logmask & LOG_CACHE) != 0) {
d_r_loginst(
dcontext, 5, &instr,
"shift_ctis_in_fragment: found cti w/ out-of-cache target");
}
#endif
}
}
}
instr_free(dcontext, &instr);
}
#ifdef PROFILE_RDTSC
* Must call finalize_profile_call and pass it the fragment_t*
* once the trace is turned into a fragment to fix up a few profile
* call instructions.
*/
void
add_profile_call(dcontext_t *dcontext)
{
monitor_data_t *md = (monitor_data_t *)dcontext->monitor_field;
instrlist_t *trace = &(md->trace);
byte *p = ((byte *)md->trace_buf) + md->trace_buf_top;
instr_t *instr;
uint num_bytes = profile_call_size();
ASSERT(num_bytes + md->trace_buf_top < md->trace_buf_size);
insert_profile_call((cache_pc)p);
* to keep dynamo from interpreting the cti instructions as real ones
*/
instr = instr_create(dcontext);
instr_set_raw_bits(instr, p, num_bytes);
instrlist_prepend(trace, instr);
md->trace_buf_top += num_bytes;
}
#endif
* limited right now to only mov instructions
* returns NULL if failed or not yet implemented, else returns the pc of the next instr.
*/
app_pc
d_r_emulate(dcontext_t *dcontext, app_pc pc, priv_mcontext_t *mc)
{
instr_t instr;
app_pc next_pc = NULL;
uint opc;
instr_init(dcontext, &instr);
next_pc = decode(dcontext, pc, &instr);
if (!instr_valid(&instr)) {
next_pc = NULL;
goto emulate_failure;
}
DOLOG(2, LOG_INTERP, { d_r_loginst(dcontext, 2, &instr, "emulating"); });
opc = instr_get_opcode(&instr);
if (opc == OP_store) {
opnd_t src = instr_get_src(&instr, 0);
opnd_t dst = instr_get_dst(&instr, 0);
reg_t *target;
reg_t val;
uint sz = opnd_size_in_bytes(opnd_get_size(dst));
ASSERT(opnd_is_memory_reference(dst));
if (sz != 4 IF_X64(&&sz != 8)) {
next_pc = NULL;
goto emulate_failure;
}
target = (reg_t *)opnd_compute_address_priv(dst, mc);
if (opnd_is_reg(src)) {
val = reg_get_value_priv(opnd_get_reg(src), mc);
} else if (opnd_is_immed_int(src)) {
val = (reg_t)opnd_get_immed_int(src);
} else {
next_pc = NULL;
goto emulate_failure;
}
DOCHECK(1, {
uint prot = 0;
ASSERT(get_memory_info((app_pc)target, NULL, NULL, &prot));
ASSERT(TEST(MEMPROT_WRITE, prot));
});
LOG(THREAD, LOG_INTERP, 2, "\temulating store by writing " PFX " to " PFX "\n",
val, target);
if (sz == 4)
*((int *)target) = (int)val;
#ifdef X64
else if (sz == 8)
*target = val;
#endif
} else if (opc == IF_X86_ELSE(OP_inc, OP_add) || opc == IF_X86_ELSE(OP_dec, OP_sub)) {
opnd_t src = instr_get_src(&instr, 0);
reg_t *target;
uint sz = opnd_size_in_bytes(opnd_get_size(src));
if (sz != 4 IF_X64(&&sz != 8)) {
next_pc = NULL;
goto emulate_failure;
}
ASSERT(opnd_is_memory_reference(src));
target = (reg_t *)opnd_compute_address_priv(src, mc);
DOCHECK(1, {
uint prot = 0;
ASSERT(get_memory_info((app_pc)target, NULL, NULL, &prot));
ASSERT(TEST(MEMPROT_WRITE, prot));
});
LOG(THREAD, LOG_INTERP, 2, "\temulating %s to " PFX "\n",
opc == IF_X86_ELSE(OP_inc, OP_add) ? "inc" : "dec", target);
if (sz == 4) {
if (opc == IF_X86_ELSE(OP_inc, OP_add))
(*((int *)target))++;
else
(*((int *)target))--;
}
#ifdef X64
else if (sz == 8) {
if (opc == IF_X86_ELSE(OP_inc, OP_add))
(*target)++;
else
(*target)--;
}
#endif
}
emulate_failure:
instr_free(dcontext, &instr);
return next_pc;
}
#ifdef AARCH64
* For each indirect branch in trace we have the following code:
* str x0, TLS_REG0_SLOT
* mov x0, #trace_next_target
* eor x0, x0, jump_target
* cbnz x0, trace_exit (ibl_routine)
* ldr x0, TLS_REG0_SLOT
* For the trace_exit (ibl_routine), it needs to conform to the
* protocol specified in emit_indirect_branch_lookup in
* aarch64/emit_utils.c.
* The ibl routine requires:
* x2: contains indirect branch target
* TLS_REG2_SLOT: contains app's x2
* Therefore we need to add addtional spill instructions
* before we actually jump to the ibl routine.
* We want the indirect hit path to have minimum instructions
* and also conform to the protocol of ibl routine
* Therefore we append the restore at the end of the trace
* after the backward jump to trace head.
* For example, the code will be fixed to:
* eor x0, x0, jump_target
* cbnz x0, trace_exit_label
* ...
* b trace_head
* trace_exit_label:
* ldr x0, TLS_REG0_SLOT
* str x2, TLS_REG2_SLOT
* mov x2, jump_target
* b ibl_routine
*
* XXX i#2974 This way of having a trace_exit_label at the end of a trace
* breaks the linear requirement which is assumed by a lot of code, including
* translation. Currently recreation of instruction list is fixed by including
* a special call to this function. We might need to consider add special
* support in translate.c or use an alternative linear control flow.
*
*/
int
fixup_indirect_trace_exit(dcontext_t *dcontext, instrlist_t *trace)
{
instr_t *instr, *prev, *branch;
instr_t *trace_exit_label;
app_pc target = 0;
app_pc ind_target = 0;
app_pc instr_trans;
reg_id_t scratch;
reg_id_t jump_target_reg = DR_REG_NULL;
uint indirect_type = 0;
int added_size = 0;
trace_exit_label = NULL;
instr_t *trace_end = instrlist_last(trace);
LOG(THREAD, LOG_MONITOR, 4, "fixup the indirect trace exit\n");
* when more than one basic blocks are added as the trace.
* And so we iterate over the entire trace to look for indirect exits.
*/
for (instr = instrlist_first(trace); instr != trace_end;
instr = instr_get_next(instr)) {
if (instr_is_exit_cti(instr)) {
target = instr_get_branch_target_pc(instr);
if (is_indirect_branch_lookup_routine(dcontext, (cache_pc)target)) {
ASSERT(instr->opcode == OP_cbnz);
trace_exit_label = INSTR_CREATE_label(dcontext);
ind_target = target;
instr_set_target(instr, opnd_create_instr(trace_exit_label));
indirect_type = instr_exit_branch_type(instr);
instr->flags &= ~EXIT_CTI_TYPES;
instr_set_our_mangling(instr, true);
prev = instr_get_prev(instr);
ASSERT(prev->opcode == OP_eor);
ASSERT(instr_num_srcs(prev) == 4 && opnd_is_reg(instr_get_src(prev, 1)));
jump_target_reg = opnd_get_reg(instr_get_src(prev, 1));
ASSERT(ind_target && jump_target_reg != DR_REG_NULL);
scratch = (jump_target_reg == DR_REG_X0) ? DR_REG_X1 : DR_REG_X0;
instrlist_append(trace, trace_exit_label);
instr_trans = instr_get_translation(instr);
instrlist_append(trace,
INSTR_XL8(instr_create_restore_from_tls(
dcontext, scratch, TLS_REG0_SLOT),
instr_trans));
added_size += AARCH64_INSTR_SIZE;
* it away and load value of jump target into it
*/
if (jump_target_reg != IBL_TARGET_REG) {
instrlist_append(
trace,
INSTR_XL8(instr_create_save_to_tls(dcontext, IBL_TARGET_REG,
TLS_REG2_SLOT),
instr_trans));
added_size += AARCH64_INSTR_SIZE;
ASSERT(jump_target_reg != DR_REG_NULL);
instrlist_append(
trace,
INSTR_XL8(XINST_CREATE_move(dcontext,
opnd_create_reg(IBL_TARGET_REG),
opnd_create_reg(jump_target_reg)),
instr_trans));
added_size += AARCH64_INSTR_SIZE;
}
branch = XINST_CREATE_jump(dcontext, opnd_create_pc(ind_target));
instr_exit_branch_set_type(branch, indirect_type);
instr_set_translation(branch, instr_trans);
instrlist_append(trace, branch);
added_size += AARCH64_INSTR_SIZE;
}
} else if ((instr->opcode == OP_cbz || instr->opcode == OP_cbnz ||
instr->opcode == OP_tbz || instr->opcode == OP_tbnz) &&
instr_is_load_tls(instr_get_next(instr))) {
* only mangled instruction (by mangle_cbr_stolen_reg) reached here.
*/
instr_t *next = instr_get_next(instr);
opnd_t fall_target = instr_get_target(instr);
trace_exit_label = INSTR_CREATE_label(dcontext);
instr_set_target(instr, opnd_create_instr(trace_exit_label));
instrlist_append(trace, trace_exit_label);
instr_trans = instr_get_translation(instr);
reg_id_t mangled_reg = ((*(uint *)next->bytes) & 31) + DR_REG_START_GPR;
instrlist_append(trace,
INSTR_XL8(instr_create_restore_from_tls(
dcontext, mangled_reg, TLS_REG0_SLOT),
instr_trans));
added_size += AARCH64_INSTR_SIZE;
branch = XINST_CREATE_jump(dcontext, fall_target);
instr_set_translation(branch, instr_trans);
instrlist_append(trace, branch);
added_size += AARCH64_INSTR_SIZE;
* and it is possible that this jump is leaving the fragment
* in which case we should not increase the size of the fragment
*/
if (instr_is_exit_cti(branch)) {
added_size += DIRECT_EXIT_STUB_SIZE(0);
}
}
}
return added_size;
}
#endif