* Copyright (c) 2011-2022 Google, Inc. All rights reserved.
* Copyright (c) 2000-2010 VMware, Inc. All rights reserved.
* **********************************************************/
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
*
* References:
* "Intel Architecture Software Developer's Manual", 1999.
* "ARM Architecture Manual", 2014.
*/
#ifndef _ARCH_EXPORTS_H_
#define _ARCH_EXPORTS_H_ 1
#include "opnd.h"
#include "atomic_exports.h"
#ifdef X86
* On Linux we must preserve all xmm registers.
* If AVX is enabled we save ymm.
* i#437: YMM is an extension of XMM from 128-bit to 256-bit without
* adding new ones, so code operating on XMM often also operates on YMM,
* and thus some *XMM* macros also apply to *YMM*.
*/
# define XMM_REG_SIZE 16
# define YMM_REG_SIZE 32
# define ZMM_REG_SIZE 64
# define OPMASK_AVX512F_REG_SIZE 2
# define OPMASK_AVX512BW_REG_SIZE 8
# define MCXT_SIMD_SLOT_SIZE ZMM_REG_SIZE
# define MCXT_TOTAL_SIMD_SLOTS_SIZE (MCXT_NUM_SIMD_SLOTS * MCXT_SIMD_SLOT_SIZE)
# define MCXT_TOTAL_OPMASK_SLOTS_SIZE \
(MCXT_NUM_OPMASK_SLOTS * OPMASK_AVX512BW_REG_SIZE)
# define MCXT_TOTAL_SIMD_SSE_AVX_SLOTS_SIZE \
(MCXT_NUM_SIMD_SSE_AVX_SLOTS * MCXT_SIMD_SLOT_SIZE)
# define YMM_ENABLED() (proc_avx_enabled())
# define ZMM_ENABLED() (proc_avx512_enabled())
# define YMMH_REG_SIZE (YMM_REG_SIZE / 2)
# define ZMMH_REG_SIZE (ZMM_REG_SIZE / 2)
# define MCXT_YMMH_SLOTS_SIZE (MCXT_NUM_SIMD_SSE_AVX_SLOTS * YMMH_REG_SIZE)
#endif
#define CLEANCALL_NUM_INLINE_SLOTS 5
typedef enum {
IBL_NONE = -1,
IBL_RETURN = 0,
IBL_BRANCH_TYPE_START = IBL_RETURN,
IBL_INDCALL,
IBL_INDJMP,
IBL_GENERIC = IBL_INDJMP,
FIXME: remove this and add names for specific needs */
IBL_SHARED_SYSCALL = IBL_GENERIC,
IBL_BRANCH_TYPE_END
} ibl_branch_type_t;
#define IBL_HASH_FUNC_OFFSET_MAX IF_X64_ELSE(4, 3)
struct _fragment_entry_t;
struct _ibl_table_t;
* in-cache indirect branch lookup routines, store in thread-local storage.
* Goal is to get it all in one cache line: currently, though, it is
* 2 lines on 32-byte-line machines, with the call* and jmp* tables and the
* hashtable stats spilling onto the 2nd line.
* Even all on one line, shared ibl has a load vs private ibl's hardcoded immed...
*
* FIXME: to avoid splitting the mcontext for now these scratch
* fs:slots are used in fcache, but copied to the
* mcontext on transitions. see case 3701
*/
typedef struct _lookup_table_access_t {
ptr_uint_t hash_mask;
struct _fragment_entry_t *lookuptable;
} lookup_table_access_t;
typedef struct _table_stat_state_t {
* on the same cache line.
*/
* crunched keeping a subset of them in TLS.
* For example, the ret_trace & indcall_trace tables could be heavily used
* but if the indjmp table isn't, it might make sense to put the ret_bb
* table's fields into TLS since ret_bb is likely to be the most heavily
* used for BB2BB IBL.
*/
lookup_table_access_t table[IBL_BRANCH_TYPE_END];
* DEBUG builds have the same layout especially when backward
* aligned entry */
#ifdef HASHTABLE_STATISTICS
uint stats;
#endif
} table_stat_state_t;
#if defined(AARCHXX) || defined(RISCV64)
typedef struct _ibl_entry_pc_t {
byte *ibl;
byte *unlinked;
} ibl_entry_pc_t;
#endif
* -no_ibl_table_in_tls, only these slots are mapped to TLS (and the
* table address/mask pairs are not).
*/
typedef struct _spill_state_t {
#ifdef X86
reg_t xax, xbx, xcx, xdx;
#elif defined(AARCHXX)
reg_t r0, r1, r2, r3;
reg_t r4, r5;
reg_t reg_stolen;
#elif defined(RISCV64)
reg_t a0, a1, a2, a3;
* Note that on RISC-V, tp is the thread pointer, and it is also a general-purpose
* register, so we steal tp to store DR's tls.
*/
reg_t reg_stolen;
#endif
dcontext_t *dcontext;
#if defined(RISCV64) || defined(AARCHXX)
* registers with a single instruction in our exit stubs and gencode.
*/
byte *fcache_return;
ibl_entry_pc_t trace_ibl[IBL_BRANCH_TYPE_END];
ibl_entry_pc_t bb_ibl[IBL_BRANCH_TYPE_END];
#endif
#ifdef AARCHXX
ptr_uint_t ldstex_addr;
ptr_uint_t ldstex_value;
ptr_uint_t ldstex_value2;
ptr_uint_t ldstex_size;
# ifdef ARM
reg_t ldstex_flags;
# endif
#endif
} spill_state_t;
typedef struct _local_state_t {
spill_state_t spill_space;
} local_state_t;
typedef struct _local_state_extended_t {
spill_state_t spill_space;
table_stat_state_t table_space;
} local_state_extended_t;
* accessible off of fs:. But, the actual segment offset varies, so
* os_tls_offset() must be used to obtain an fs: offset from a slot.
*/
#ifdef X86
# define TLS_XAX_SLOT ((ushort)offsetof(spill_state_t, xax))
# define TLS_XBX_SLOT ((ushort)offsetof(spill_state_t, xbx))
# define TLS_XCX_SLOT ((ushort)offsetof(spill_state_t, xcx))
# define TLS_XDX_SLOT ((ushort)offsetof(spill_state_t, xdx))
# define TLS_REG0_SLOT TLS_XAX_SLOT
# define TLS_REG1_SLOT TLS_XBX_SLOT
# define TLS_REG2_SLOT TLS_XCX_SLOT
# define TLS_REG3_SLOT TLS_XDX_SLOT
# define SCRATCH_REG0 DR_REG_XAX
# define SCRATCH_REG1 DR_REG_XBX
# define SCRATCH_REG2 DR_REG_XCX
# define SCRATCH_REG3 DR_REG_XDX
#elif defined(AARCHXX)
# define TLS_REG0_SLOT ((ushort)offsetof(spill_state_t, r0))
# define TLS_REG1_SLOT ((ushort)offsetof(spill_state_t, r1))
# define TLS_REG2_SLOT ((ushort)offsetof(spill_state_t, r2))
# define TLS_REG3_SLOT ((ushort)offsetof(spill_state_t, r3))
# define TLS_REG4_SLOT ((ushort)offsetof(spill_state_t, r4))
# define TLS_REG5_SLOT ((ushort)offsetof(spill_state_t, r5))
# define TLS_REG_STOLEN_SLOT ((ushort)offsetof(spill_state_t, reg_stolen))
# define SCRATCH_REG0 DR_REG_R0
# define SCRATCH_REG1 DR_REG_R1
# define SCRATCH_REG2 DR_REG_R2
# define SCRATCH_REG3 DR_REG_R3
# define SCRATCH_REG4 DR_REG_R4
# define SCRATCH_REG5 DR_REG_R5
# define SCRATCH_REG_LAST SCRATCH_REG5
#elif defined(RISCV64)
# define TLS_REG0_SLOT ((ushort)offsetof(spill_state_t, a0))
# define TLS_REG1_SLOT ((ushort)offsetof(spill_state_t, a1))
# define TLS_REG2_SLOT ((ushort)offsetof(spill_state_t, a2))
# define TLS_REG3_SLOT ((ushort)offsetof(spill_state_t, a3))
# define TLS_REG_STOLEN_SLOT ((ushort)offsetof(spill_state_t, reg_stolen))
# define SCRATCH_REG0 DR_REG_A0
# define SCRATCH_REG1 DR_REG_A1
# define SCRATCH_REG2 DR_REG_A2
# define SCRATCH_REG3 DR_REG_A3
#endif
#define IBL_TARGET_REG SCRATCH_REG2
#define IBL_TARGET_SLOT TLS_REG2_SLOT
#define TLS_DCONTEXT_SLOT ((ushort)offsetof(spill_state_t, dcontext))
#ifdef AARCHXX
# define TLS_FCACHE_RETURN_SLOT ((ushort)offsetof(spill_state_t, fcache_return))
# define TLS_LDSTEX_ADDR_SLOT ((ushort)offsetof(spill_state_t, ldstex_addr))
# define TLS_LDSTEX_VALUE_SLOT ((ushort)offsetof(spill_state_t, ldstex_value))
# define TLS_LDSTEX_VALUE2_SLOT ((ushort)offsetof(spill_state_t, ldstex_value2))
# define TLS_LDSTEX_SIZE_SLOT ((ushort)offsetof(spill_state_t, ldstex_size))
# ifdef ARM
# define TLS_LDSTEX_FLAGS_SLOT ((ushort)offsetof(spill_state_t, ldstex_flags))
# endif
#elif defined(RISCV64)
# define TLS_FCACHE_RETURN_SLOT ((ushort)offsetof(spill_state_t, fcache_return))
#endif
#define TABLE_OFFSET (offsetof(local_state_extended_t, table_space))
#define TLS_MASK_SLOT(btype) \
((ushort)(TABLE_OFFSET + offsetof(table_stat_state_t, table[btype]) + \
offsetof(lookup_table_access_t, hash_mask)))
#define TLS_TABLE_SLOT(btype) \
((ushort)(TABLE_OFFSET + offsetof(table_stat_state_t, table[btype]) + \
offsetof(lookup_table_access_t, lookuptable)))
#ifdef HASHTABLE_STATISTICS
# define TLS_HTABLE_STATS_SLOT \
((ushort)(offsetof(local_state_extended_t, table_space) + \
offsetof(table_stat_state_t, stats)))
#endif
#define TLS_NUM_SLOTS \
(DYNAMO_OPTION(ibl_table_in_tls) ? sizeof(local_state_extended_t) / sizeof(void *) \
: sizeof(local_state_t) / sizeof(void *))
#ifdef WINDOWS
# define DETACH_CALLBACK_CODE_SIZE 256
# define DETACH_CALLBACK_FINAL_JMP_SIZE 32
* we detach. We have one per a thread (with stacked callbacks) stored in an array. */
typedef struct _detach_callback_stack_t {
thread_id_t tid;
ptr_uint_t count;
app_pc *callback_addrs;
reg_t xax_save;
reg_t xbx_save;
reg_t xcx_save;
app_pc target;
byte code_buf[DETACH_CALLBACK_FINAL_JMP_SIZE];
} detach_callback_stack_t;
void
arch_patch_syscall(dcontext_t *dcontext, byte *target);
byte *
emit_detach_callback_code(dcontext_t *dcontext, byte *buf,
detach_callback_stack_t *callback_state);
void
emit_detach_callback_final_jmp(dcontext_t *dcontext,
detach_callback_stack_t *callback_state);
#endif
uint64
proc_get_timestamp(void);
#ifdef WINDOWS
# define RDTSC_LL(var) (var = __rdtsc())
byte *
get_frame_ptr(void);
byte *
get_stack_ptr(void);
# define GET_FRAME_PTR(var) (var = get_frame_ptr())
# define GET_STACK_PTR(var) (var = get_stack_ptr())
#else
# ifdef DR_HOST_X86
# ifdef X64
# define RDTSC_LL(llval) \
do { \
uint low, high; \
__asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high)); \
(llval) = ((uint64)high << 32) | low; \
} while (0)
# define GET_CUR_PC(var) \
__asm__ __volatile__("call 1f; 1: pop %%rax; mov %%rax, %0" \
: "=m"(var) \
: \
: "rax", "memory")
# else
# define RDTSC_LL(llval) __asm__ __volatile__("rdtsc" : "=A"(llval))
# define GET_CUR_PC(var) \
__asm__ __volatile__("call 1f; 1: pop %%eax; mov %%eax, %0" \
: "=m"(var) \
: \
: "eax", "memory")
# endif
# define GET_FRAME_PTR(var) \
asm("mov %%" IF_X64_ELSE("rbp", "ebp") ", %0" : "=m"(var))
# define GET_STACK_PTR(var) \
asm("mov %%" IF_X64_ELSE("rsp", "esp") ", %0" : "=m"(var))
# elif defined(DR_HOST_AARCH64)
# define RDTSC_LL(llval) \
do { \
(llval) = proc_get_timestamp(); \
} while (0)
# define GET_FRAME_PTR(var) __asm__ __volatile__("mov %0, x29" : "=r"(var))
# define GET_STACK_PTR(var) __asm__ __volatile__("mov %0, sp" : "=r"(var))
# define GET_CUR_PC(var) \
__asm__ __volatile__("bl 1f\n1: str x30, %0" : "=m"(var) : : "x30")
# elif defined(DR_HOST_ARM)
# define RDTSC_LL(llval) (llval) = proc_get_timestamp()
# define GET_FRAME_PTR(var) \
__asm__ __volatile__("str " IF_X64_ELSE("x29", "r11") ", %0" : "=m"(var))
# define GET_STACK_PTR(var) __asm__ __volatile__("str sp, %0" : "=m"(var))
# define GET_CUR_PC(var) \
__asm__ __volatile__("bl 1f\n1: str lr, %0" : "=m"(var) : : "lr")
# elif defined(DR_HOST_RISCV64)
# define RDTSC_LL(llval) \
do { \
__asm__ __volatile__("csrr %0, time" : "=r"(llval)); \
} while (0)
# define GET_FRAME_PTR(var) __asm__ __volatile__("mv %0, fp" : "=r"(var))
# define GET_STACK_PTR(var) __asm__ __volatile__("mv %0, sp" : "=r"(var))
# define GET_CUR_PC(var) \
__asm__ __volatile__("jal t0, 1f; 1: sd t0, %0" : "=m"(var) : : "t0")
# endif
#endif
#define DEBUGGER_INTERRUPT_BYTE 0xcc
* The write that inserts the relative target is done atomically so this
* function is safe with respect to a thread executing the code containing
* this target, presuming that the code in both the before and after states
* is valid
*/
byte *
insert_relative_target(byte *pc, cache_pc target, bool hot_patch);
byte *
insert_relative_jump(byte *pc, cache_pc target, bool hot_patch);
#ifdef PROFILE_RDTSC
# ifdef UNIX
__inline__ uint64
get_time();
# else
uint64
get_time(void);
# endif
#endif
void
d_r_arch_init(void);
void d_r_arch_exit(IF_WINDOWS_ELSE_NP(bool detach_stacked_callbacks, void));
void
arch_thread_init(dcontext_t *dcontext);
void
arch_thread_exit(dcontext_t *dcontext _IF_WINDOWS(bool detach_stacked_callbacks));
#if defined(WINDOWS_PC_SAMPLE) && !defined(DEBUG)
void
arch_thread_profile_exit(dcontext_t *dcontext);
void
arch_profile_exit(void);
#endif
#ifdef AARCHXX
void
arch_reset_stolen_reg(void);
void
arch_mcontext_reset_stolen_reg(dcontext_t *dcontext, priv_mcontext_t *mc);
#endif
bool
is_indirect_branch_lookup_routine(dcontext_t *dcontext, cache_pc pc);
bool
in_generated_routine(dcontext_t *dcontext, cache_pc pc);
bool
in_fcache_return(dcontext_t *dcontext, cache_pc pc);
bool
in_clean_call_save(dcontext_t *dcontext, cache_pc pc);
bool
in_clean_call_restore(dcontext_t *dcontext, cache_pc pc);
bool
in_indirect_branch_lookup_code(dcontext_t *dcontext, cache_pc pc);
cache_pc
get_fcache_target(dcontext_t *dcontext);
void
set_fcache_target(dcontext_t *dcontext, cache_pc value);
void
copy_mcontext(priv_mcontext_t *src, priv_mcontext_t *dst);
bool
dr_mcontext_to_priv_mcontext(priv_mcontext_t *dst, dr_mcontext_t *src);
bool
priv_mcontext_to_dr_mcontext(dr_mcontext_t *dst, priv_mcontext_t *src);
priv_mcontext_t *
dr_mcontext_as_priv_mcontext(dr_mcontext_t *mc);
priv_mcontext_t *
get_priv_mcontext_from_dstack(dcontext_t *dcontext);
void
dr_mcontext_init(dr_mcontext_t *mc);
void
dump_mcontext(priv_mcontext_t *context, file_t f, bool dump_xml);
#ifdef AARCHXX
reg_t
get_stolen_reg_val(priv_mcontext_t *context);
void
set_stolen_reg_val(priv_mcontext_t *mc, reg_t newval);
#endif
const char *
get_branch_type_name(ibl_branch_type_t branch_type);
ibl_branch_type_t
get_ibl_branch_type(instr_t *instr);
* atomic hashtable delete can be performed for the given fragment.
*/
cache_pc
get_target_delete_entry_pc(dcontext_t *dcontext, struct _ibl_table_t *table);
cache_pc
get_reset_exit_stub(dcontext_t *dcontext);
typedef linkstub_t *(*fcache_enter_func_t)(dcontext_t *dcontext);
fcache_enter_func_t
get_fcache_enter_private_routine(dcontext_t *dcontext);
fcache_enter_func_t
get_fcache_enter_gonative_routine(dcontext_t *dcontext);
cache_pc
get_unlinked_entry(dcontext_t *dcontext, cache_pc linked_entry);
cache_pc
get_linked_entry(dcontext_t *dcontext, cache_pc unlinked_entry);
#ifdef X64
cache_pc
get_trace_cmp_entry(dcontext_t *dcontext, cache_pc linked_entry);
#endif
cache_pc
get_do_syscall_entry(dcontext_t *dcontext);
#ifdef WINDOWS
fcache_enter_func_t
get_fcache_enter_indirect_routine(dcontext_t *dcontext);
cache_pc
get_do_callback_return_entry(dcontext_t *dcontext);
#else
cache_pc
get_do_int_syscall_entry(dcontext_t *dcontext);
cache_pc
get_do_int81_syscall_entry(dcontext_t *dcontext);
cache_pc
get_do_int82_syscall_entry(dcontext_t *dcontext);
cache_pc
get_do_clone_syscall_entry(dcontext_t *dcontext);
# ifdef VMX86_SERVER
cache_pc
get_do_vmkuw_syscall_entry(dcontext_t *dcontext);
# endif
#endif
byte *
get_global_do_syscall_entry(void);
* after_shared_syscall_addr in fact both check for the same address with
* int system calls, so can't use them to disambiguate between the two. */
#ifdef WINDOWS
* - addr is the instruction immediately after the int 2e
* - code is the addr of our code that handles post int 2e
* For non int system calls addr and code will be the same (ret addr), this
* is just for working around our Sygate int 2e in ntdll hack (5217) */
cache_pc
after_shared_syscall_addr(dcontext_t *dcontext);
cache_pc
after_shared_syscall_code(dcontext_t *dcontext);
bool
is_shared_syscall_routine(dcontext_t *dcontext, cache_pc pc);
* - addr is the instruction immediately after the int 2e
* - code is the addr of our code that handles post int 2e
* For non int system calls addr and code will be the same (ret addr), this
* is just for working around our Sygate int 2e in ntdll hack (5217) */
cache_pc
after_do_syscall_addr(dcontext_t *dcontext);
cache_pc
after_do_syscall_code(dcontext_t *dcontext);
#else
cache_pc
after_do_shared_syscall_addr(dcontext_t *dcontext);
cache_pc
after_do_syscall_addr(dcontext_t *dcontext);
bool
is_after_main_do_syscall_addr(dcontext_t *dcontext, cache_pc pc);
bool
is_after_do_syscall_addr(dcontext_t *dcontext, cache_pc pc);
#endif
bool
is_after_syscall_address(dcontext_t *dcontext, cache_pc pc);
bool
is_after_syscall_that_rets(dcontext_t *dcontext, cache_pc pc);
void
update_generated_hashtable_access(dcontext_t *dcontext);
fcache_enter_func_t
get_fcache_enter_shared_routine(dcontext_t *dcontext);
* We assume that only one method is in use, except for 32-bit applications
* on 64-bit x86 linux kernels, which use both sys{enter,call} on the vsyscall
* page and inlined int (PR 286922).
* For these apps, DR itself and global_do_syscall use int, but we
* have both a do_syscall for the vsyscall and a separate do_int_syscall
* (we can't use the vsyscall for some system calls like clone: we could
* potentially use int for everything if we fixed up the syscall args).
* The method set in that case is the vsyscall method.
*/
enum {
SYSCALL_METHOD_UNINITIALIZED,
SYSCALL_METHOD_INT,
SYSCALL_METHOD_SYSENTER,
SYSCALL_METHOD_SYSCALL,
#ifdef WINDOWS
SYSCALL_METHOD_WOW64,
#endif
SYSCALL_METHOD_SVC,
SYSCALL_METHOD_ECALL,
};
#ifdef UNIX
enum { SYSCALL_METHOD_LONGEST_INSTR = 2 };
#endif
void
check_syscall_method(dcontext_t *dcontext, instr_t *instr);
int
get_syscall_method(void);
bool
does_syscall_ret_to_callsite(void);
void
set_syscall_method(int method);
#ifdef LINUX
bool
should_syscall_method_be_sysenter(void);
#endif
bool
hook_vsyscall(dcontext_t *dcontext, bool method_changing);
bool
unhook_vsyscall(void);
* in win32/os.c that uses this for PRE_SYSCALL_PC, not for general use */
byte *
get_app_sysenter_addr(void);
* currently on the d_r_initstack, 'mutex_to_free' should be passed so we release the
* initstack_mutex. The supplied 'func_arg' will be passed as an argument to 'func'.
* If 'func' returns then 'return_on_return' is checked. If set we swap back stacks and
* return to the caller. If not set then it's assumed that func wasn't supposed to
* return and we go to an error routine unexpected_return() below.
*/
void
call_switch_stack(void *func_arg, byte *stack, void (*func)(void *arg),
void *mutex_to_free, bool return_on_return);
#if defined(WINDOWS) && !defined(X64)
DYNAMORIO_EXPORT int64
dr_invoke_x64_routine(dr_auxlib64_routine_ptr_t func64, uint num_params, ...);
#endif
void
unexpected_return(void);
void
clone_and_swap_stack(byte *stack, byte *tos);
void
go_native(dcontext_t *dcontext);
* Uses the current dstack, but instructs the cleanup routines not to
* de-allocate it, does a custom de-allocate after swapping to d_r_initstack (don't
* want to use d_r_initstack the whole time, that's too long to hold the mutex).
* Then calls system call sysnum with parameter base param_base, which is presumed
* to be either NtTerminateThread or NtTerminateProcess or exit.
*
* Note that the caller is responsible for placing the actual syscall arguments
* at the correct offset from edx (or ebx). See SYSCALL_PARAM_OFFSET in
* win32 os.c for more info.
*/
void
cleanup_and_terminate(dcontext_t *dcontext, int sysnum, ptr_uint_t sys_arg1,
ptr_uint_t sys_arg2, bool exitproc,
ptr_uint_t sys_arg3, ptr_uint_t sys_arg4);
bool
cpuid_supported(void);
void
our_cpuid(int res[4], int eax, int ecx);
#ifdef WINDOWS
int
dynamorio_syscall_int2e(int sysnum, ...);
int
dynamorio_syscall_sysenter(int sysnum, ...);
int
dynamorio_syscall_sygate_int2e(int sysnum, ...);
int
dynamorio_syscall_sygate_sysenter(int sysnum, ...);
# ifdef X64
int
dynamorio_syscall_syscall(int sysnum, ...);
# endif
int
dynamorio_syscall_wow64(int sysnum, ...);
int
dynamorio_syscall_wow64_noedx(int sysnum, ...);
void
get_segments_cs_ss(cxt_seg_t *cs, cxt_seg_t *ss);
void
get_segments_defg(cxt_seg_t *ds, cxt_seg_t *es, cxt_seg_t *fs, cxt_seg_t *gs);
void
get_own_context_helper(CONTEXT *cxt);
* call internal_exception_info() with a separate exception stack.
*/
void
call_intr_excpt_alt_stack(dcontext_t *dcontext, EXCEPTION_RECORD *pExcptRec, CONTEXT *cxt,
byte *stack);
void
dynamorio_earliest_init_takeover(void);
#else
void
client_int_syscall(void);
void
dynamorio_sigreturn(void);
void
dynamorio_sys_exit(void);
void
dynamorio_condvar_wake_and_jmp(KSYNCH_TYPE *ksynch ,
byte *jmp_tgt );
# ifdef LINUX
# ifndef X64
void
dynamorio_nonrt_sigreturn(void);
# endif
thread_id_t
dynamorio_clone(uint flags, byte *newsp, void *ptid, void *tls, void *ctid,
void (*func)(void));
void
xfer_to_new_libdr(app_pc entry, void **init_sp, byte *cur_dr_map, size_t cur_dr_size);
# endif
# ifdef MACOS
void
new_bsdthread_intercept(void);
# endif
#endif
void
back_from_native(void);
void
back_from_native_retstubs(void);
void
back_from_native_retstubs_end(void);
enum { BACK_FROM_NATIVE_RETSTUB_SIZE = 4 };
#ifdef UNIX
void
native_plt_call(void);
#endif
DEBUG_DECLARE(void debug_infinite_loop(void); )
void
hashlookup_null_handler(void);
void
dr_stmxcsr(uint *val);
void
dr_xgetbv(uint *high, uint *low);
void
dr_fxsave(byte *buf_aligned);
void
dr_fnsave(byte *buf_aligned);
void
dr_fxrstor(byte *buf_aligned);
void
dr_frstor(byte *buf_aligned);
#ifdef X64
void
dr_fxsave32(byte *buf_aligned);
void
dr_fxrstor32(byte *buf_aligned);
#endif
* the mcontext and the SP of the caller of dr_app_start() and
* dynamorio_app_take_over().
*/
#ifdef X86
# define DYNAMO_START_XSP_ADJUST 16
#else
# define DYNAMO_START_XSP_ADJUST 0
#endif
void
dynamo_start(priv_mcontext_t *mc);
* original app state.
*/
int
native_get_retstack_idx(priv_mcontext_t *mc);
#include "proc.h"
#ifdef DEBUG
void
disassemble_fragment(dcontext_t *dcontext, fragment_t *f, bool just_header);
void
disassemble_app_bb(dcontext_t *dcontext, app_pc tag, file_t outfile);
void
dump_mcontext_callstack(dcontext_t *dcontext);
#endif
enum {
CALLSTACK_USE_XML = 0x00000001,
CALLSTACK_ADD_HEADER = 0x00000002,
CALLSTACK_MODULE_INFO = 0x00000004,
CALLSTACK_MODULE_PATH = 0x00000008,
CALLSTACK_FRAME_PTR = 0x00000010,
};
void
dump_dr_callstack(file_t outfile);
void
dump_callstack(app_pc pc, app_pc ebp, file_t outfile, bool dump_xml);
void
dump_callstack_to_buffer(char *buf, size_t bufsz, size_t *sofar, app_pc pc, app_pc ebp,
uint flags );
void
disassemble_fragment_header(dcontext_t *dcontext, fragment_t *f, file_t outfile);
void
disassemble_fragment_body(dcontext_t *dcontext, fragment_t *f, file_t outfile);
void
disassemble_app_bb(dcontext_t *dcontext, app_pc tag, file_t outfile);
static inline bool
use_addr_prefix_on_short_disp(void)
{
#ifdef STANDALONE_DECODER
return false;
#else
return (
DYNAMO_OPTION(ibl_addr_prefix) ||
* on Pentium M, Core, and Core2.
* It's better on Pentium 4 and Pentium D.
*
* Note that this variation by processor type does not need to
* be stored in pcaches b/c either way works and the size is
* not assumed (except for prefixes: but coarse_units doesn't
* support prefixes in general).
*/
(proc_get_family() == FAMILY_PENTIUM_4 ||
(proc_get_family() == FAMILY_PENTIUM_3 &&
(proc_get_model() <= 8 || proc_get_model() == 10 || proc_get_model() == 11))));
* emits in emit_utils.c, except in cases where space is more
* important than speed.
* FIXME: case 5231 long term solution should properly choose
* - ibl - speed
* - prefixes - speed/space?
* - app code - preserverd since we normally don't need to reencode,
* unless it is a CTI that goes through FS - should be preserved too
* - direct stubs - space
* - indirect stubs - speed/space?
* - enter/exit - speed?
* - interception routines - speed?
*/
#endif
}
#include "encode_api.h"
#define DEFAULT_ISA_MODE_STATIC \
IF_X86_ELSE(IF_X64_ELSE(DR_ISA_AMD64, DR_ISA_IA32), \
IF_AARCHXX_ELSE(IF_X64_ELSE(DR_ISA_ARM_A64, DR_ISA_ARM_THUMB), \
DR_ISA_RV64IMAFDC))
* This one is now static as well after we removed the runtime option that
* used to be here: but I'm leaving the split to make it easier to add
* an option in the future.
*/
#define DEFAULT_ISA_MODE \
IF_X86_ELSE(IF_X64_ELSE(DR_ISA_AMD64, DR_ISA_IA32), \
IF_AARCHXX_ELSE(IF_X64_ELSE(DR_ISA_ARM_A64, DR_ISA_ARM_THUMB), \
DR_ISA_RV64IMAFDC))
#ifdef ARM
# define ENTRY_PC_TO_DECODE_PC(pc) \
((app_pc)(ALIGN_BACKWARD(pc, THUMB_SHORT_INSTR_SIZE)))
#else
# define ENTRY_PC_TO_DECODE_PC(pc) ((app_pc)(pc))
#endif
app_pc
canonicalize_pc_target(dcontext_t *dcontext, app_pc pc);
void
d_r_decode_init(void);
bool
fill_with_nops(dr_isa_mode_t isa_mode, byte *addr, size_t size);
* Arch-specific defines
*/
#ifdef X86
# define SIZE64_MOV_XAX_TO_TLS 8
# define SIZE64_MOV_XBX_TO_TLS 9
# define SIZE64_MOV_PTR_IMM_TO_XAX 10
# define SIZE64_MOV_PTR_IMM_TO_TLS (12 * 2)
# define SIZE64_MOV_R8_TO_XAX 3
# define SIZE64_MOV_R9_TO_XCX 3
# define SIZE32_MOV_XAX_TO_TLS 5
# define SIZE32_MOV_XBX_TO_TLS 6
# define SIZE32_MOV_XAX_TO_TLS_DISP32 6
# define SIZE32_MOV_XBX_TO_TLS_DISP32 7
# define SIZE32_MOV_XAX_TO_ABS 5
# define SIZE32_MOV_XBX_TO_ABS 6
# define SIZE32_MOV_PTR_IMM_TO_XAX 5
# define SIZE32_MOV_PTR_IMM_TO_TLS 10
# ifdef X64
# define FRAG_IS_32(flags) (TEST(FRAG_32_BIT, (flags)))
# define FRAG_IS_X86_TO_X64(flags) (TEST(FRAG_X86_TO_X64, (flags)))
# else
# define FRAG_IS_32(flags) true
# define FRAG_IS_X86_TO_X64(flags) false
# endif
# define PC_AS_JMP_TGT(isa_mode, pc) pc
# define PC_AS_LOAD_TGT(isa_mode, pc) pc
# define SIZE_MOV_XAX_TO_TLS(flags, require_addr16) \
(FRAG_IS_32(flags) ? ((require_addr16 || use_addr_prefix_on_short_disp()) \
? SIZE32_MOV_XAX_TO_TLS \
: SIZE32_MOV_XAX_TO_TLS_DISP32) \
: SIZE64_MOV_XAX_TO_TLS)
# define SIZE_MOV_XBX_TO_TLS(flags, require_addr16) \
(FRAG_IS_32(flags) ? ((require_addr16 || use_addr_prefix_on_short_disp()) \
? SIZE32_MOV_XBX_TO_TLS \
: SIZE32_MOV_XBX_TO_TLS_DISP32) \
: SIZE64_MOV_XBX_TO_TLS)
# define SIZE_MOV_PTR_IMM_TO_XAX(flags) \
(FRAG_IS_32(flags) ? SIZE32_MOV_PTR_IMM_TO_XAX : SIZE64_MOV_PTR_IMM_TO_XAX)
# define XCX_IN_TLS(flags) \
(DYNAMO_OPTION(private_ib_in_tls) || TEST(FRAG_SHARED, (flags)))
# define FRAGMENT_BASE_PREFIX_SIZE(flags) \
((FRAG_IS_X86_TO_X64(flags) && \
IF_X64_ELSE(DYNAMO_OPTION(x86_to_x64_ibl_opt), false)) \
? SIZE64_MOV_R9_TO_XCX \
: (XCX_IN_TLS(flags) ? SIZE_MOV_XBX_TO_TLS(flags, false) \
: SIZE32_MOV_XBX_TO_ABS))
* FIXME: find better way to export -- would use global var accessed
* by macro, but easiest to have as static initializer for heap bucket
*/
* SIZE32_MOV_XAX_TO_TLS == SIZE32_MOV_XAX_TO_ABS, and that
* x64 always uses tls
*/
# define DIRECT_EXIT_STUB_SIZE32 \
(SIZE32_MOV_XAX_TO_TLS + SIZE32_MOV_PTR_IMM_TO_XAX + JMP_LONG_LENGTH)
# define DIRECT_EXIT_STUB_SIZE64 \
(SIZE64_MOV_XAX_TO_TLS + SIZE64_MOV_PTR_IMM_TO_XAX + JMP_LONG_LENGTH)
# define DIRECT_EXIT_STUB_SIZE(flags) \
(FRAG_IS_32(flags) ? DIRECT_EXIT_STUB_SIZE32 : DIRECT_EXIT_STUB_SIZE64)
# define DIRECT_EXIT_STUB_DATA_SZ 0
* link through the stub and not mess up app state.
* 1st instr example:
* 67 64 c7 06 e0 0e 02 99 4e 7d addr16 mov $0x7d4e9902 -> %fs:0x0ee0
* 64-bit is split into high and low dwords:
* 65 c7 04 25 20 16 00 00 02 99 4e 7d mov $0x7d4e9902 -> %gs:0x1620
* 65 c7 04 25 24 16 00 00 00 00 00 00 mov $0x00000000 -> %gs:0x1624
* both of these exact sequences are assumed in entrance_stub_target_tag()
* and coarse_indirect_stub_jmp_target().
*/
# define STUB_COARSE_DIRECT_SIZE32 (SIZE32_MOV_PTR_IMM_TO_TLS + JMP_LONG_LENGTH)
# define STUB_COARSE_DIRECT_SIZE64 (SIZE64_MOV_PTR_IMM_TO_TLS + JMP_LONG_LENGTH)
# define STUB_COARSE_DIRECT_SIZE(flags) \
(FRAG_IS_32(flags) ? STUB_COARSE_DIRECT_SIZE32 : STUB_COARSE_DIRECT_SIZE64)
# define SET_TO_NOPS(isa_mode, addr, size) fill_with_nops(isa_mode, addr, size)
# define SET_TO_DEBUG(addr, size) memset(addr, 0xcc, size)
# define IS_SET_TO_NOP(addr, size) is_region_memset_to_char(addr, size, 0x90)
# define IS_SET_TO_DEBUG(addr, size) is_region_memset_to_char(addr, size, 0xcc)
# define CTI_PATCH_OFFSET 4
# define CTI_PATCH_SIZE 4
# define EXIT_STUB_PATCH_OFFSET 4
# define EXIT_STUB_PATCH_SIZE 4
# define MAX_PAD_SIZE 3
#elif defined(AARCHXX)
# ifdef X64
# define FRAG_IS_THUMB(flags) false
# define FRAG_IS_32(flags) false
# else
# define FRAG_IS_THUMB(flags) (TEST(FRAG_THUMB, (flags)))
# define FRAG_IS_32(flags) true
# endif
# define PC_AS_JMP_TGT(isa_mode, pc) \
((isa_mode) == DR_ISA_ARM_THUMB ? (app_pc)(((ptr_uint_t)pc) | 1) : pc)
# define PC_AS_LOAD_TGT(isa_mode, pc) \
((isa_mode) == DR_ISA_ARM_THUMB ? (app_pc)(((ptr_uint_t)pc) & ~0x1) : pc)
# ifdef AARCH64
# define AARCH64_INSTR_SIZE 4
# define FRAGMENT_BASE_PREFIX_SIZE(flags) AARCH64_INSTR_SIZE
# define DIRECT_EXIT_STUB_SIZE(flags) \
(10 * AARCH64_INSTR_SIZE)
* We reserve 12 bytes for the 8 byte address, so that we can store it in an 8-byte
* aligned address. This is required for atomicity of write operations.
*/
# define DIRECT_EXIT_STUB_DATA_SLOT_ALIGNMENT_PADDING 4
# define DIRECT_EXIT_STUB_DATA_SZ \
(sizeof(app_pc) + DIRECT_EXIT_STUB_DATA_SLOT_ALIGNMENT_PADDING)
# else
# define FRAGMENT_BASE_PREFIX_SIZE(flags) \
(FRAG_IS_THUMB(flags) ? THUMB_LONG_INSTR_SIZE : ARM_INSTR_SIZE)
# define ARM_INSTR_SIZE 4
# define THUMB_SHORT_INSTR_SIZE 2
# define THUMB_LONG_INSTR_SIZE 4
# define DIRECT_EXIT_STUB_INSTR_COUNT 4
# define DIRECT_EXIT_STUB_DATA_SZ sizeof(app_pc)
# define DIRECT_EXIT_STUB_SIZE(flags) \
((FRAG_IS_THUMB(flags) \
? (DIRECT_EXIT_STUB_INSTR_COUNT * THUMB_LONG_INSTR_SIZE) \
: (DIRECT_EXIT_STUB_INSTR_COUNT * ARM_INSTR_SIZE)) + \
DIRECT_EXIT_STUB_DATA_SZ)
# endif
# define STUB_COARSE_DIRECT_SIZE(flags) (ASSERT_NOT_IMPLEMENTED(false), 0)
# define ARM_NOP 0xe320f000
# define THUMB_NOP 0xbf00
# define ARM_BKPT 0xe1200070
# define THUMB_BKPT 0xbe00
# define SET_TO_NOPS(isa_mode, addr, size) fill_with_nops(isa_mode, addr, size)
# define SET_TO_DEBUG(addr, size) ASSERT_NOT_IMPLEMENTED(false)
# define IS_SET_TO_DEBUG(addr, size) (ASSERT_NOT_IMPLEMENTED(false), false)
# define CTI_PATCH_OFFSET 4
# define CTI_PATCH_SIZE 4
# define EXIT_STUB_PATCH_OFFSET 4
# define EXIT_STUB_PATCH_SIZE 4
# define MAX_PAD_SIZE 0
# define PC_LOAD_ADDR_ALIGN 4
#elif defined(RISCV64)
* these instructions. */
# define RISCV64_INSTR_SIZE 4
# define RISCV64_INSTR_COMPRESSED_SIZE 2
# define FRAGMENT_BASE_PREFIX_SIZE(flags) RISCV64_INSTR_SIZE * 2
# define DIRECT_EXIT_STUB_SIZE(flags) \
(16 * RISCV64_INSTR_SIZE)
# define FRAG_IS_32(flags) false
# define PC_AS_JMP_TGT(isa_mode, pc) pc
# define PC_AS_LOAD_TGT(isa_mode, pc) pc
# define DIRECT_EXIT_STUB_DATA_SLOT_ALIGNMENT_PADDING 4
# define DIRECT_EXIT_STUB_DATA_SZ \
(sizeof(app_pc) + DIRECT_EXIT_STUB_DATA_SLOT_ALIGNMENT_PADDING)
# define STUB_COARSE_DIRECT_SIZE(flags) (ASSERT_NOT_IMPLEMENTED(false), 0)
# define SET_TO_NOPS(isa_mode, addr, size) fill_with_nops(isa_mode, addr, size)
# define SET_TO_DEBUG(addr, size) ASSERT_NOT_IMPLEMENTED(false)
# define IS_SET_TO_DEBUG(addr, size) (ASSERT_NOT_IMPLEMENTED(false), false)
# define CTI_PATCH_OFFSET 4
# define EXIT_STUB_PATCH_OFFSET 4
# define EXIT_STUB_PATCH_SIZE 4
# define MAX_PAD_SIZE 0
#endif
#define WITHIN_PAD_REGION(lower, upper) ((upper) - (lower) <= PAD_JMPS_ALIGNMENT)
#define STATS_PAD_JMPS_ADD(flags, stat, val) \
DOSTATS({ \
if (TEST(FRAG_SHARED, (flags))) { \
if (TEST(FRAG_IS_TRACE, (flags))) \
STATS_ADD(pad_jmps_shared_trace_##stat, val); \
else \
STATS_ADD(pad_jmps_shared_bb_##stat, val); \
} else if (TEST(FRAG_IS_TRACE, (flags))) \
STATS_ADD(pad_jmps_trace_##stat, val); \
else if (TEST(FRAG_TEMP_PRIVATE, (flags))) \
STATS_ADD(pad_jmps_temp_##stat, val); \
else \
STATS_ADD(pad_jmps_bb_##stat, val); \
})
bool
is_exit_cti_stub_patchable(dcontext_t *dcontext, instr_t *inst, uint frag_flags);
uint
extend_trace_pad_bytes(fragment_t *add_frag);
uint
patchable_exit_cti_align_offs(dcontext_t *dcontext, instr_t *inst, cache_pc pc);
bool
is_patchable_exit_stub(dcontext_t *dcontext, linkstub_t *l, fragment_t *f);
uint
bytes_for_exitstub_alignment(dcontext_t *dcontext, linkstub_t *l, fragment_t *f,
byte *startpc);
byte *
pad_for_exitstub_alignment(dcontext_t *dcontext, linkstub_t *l, fragment_t *f,
byte *startpc);
void
remove_nops_from_ilist(dcontext_t *dcontext,
instrlist_t *ilist _IF_DEBUG(bool recreating));
uint
nop_pad_ilist(dcontext_t *dcontext, fragment_t *f, instrlist_t *ilist, bool emitting);
bool
is_exit_cti_patchable(dcontext_t *dcontext, instr_t *inst, uint frag_flags);
int
exit_stub_size(dcontext_t *dcontext, cache_pc target, uint flags);
int
insert_exit_stub(dcontext_t *dcontext, fragment_t *f, linkstub_t *l, cache_pc stub_pc);
int
linkstub_unlink_entry_offset(dcontext_t *dcontext, fragment_t *f, linkstub_t *l);
cache_pc
indirect_linkstub_stub_pc(dcontext_t *dcontext, fragment_t *f, linkstub_t *l);
cache_pc
indirect_linkstub_target(dcontext_t *dcontext, fragment_t *f, linkstub_t *l);
linkstub_t *
linkstub_cbr_disambiguate(dcontext_t *dcontext, fragment_t *f, linkstub_t *l1,
linkstub_t *l2);
cache_pc
cbr_fallthrough_exit_cti(cache_pc prev_cti_pc);
enum { NOT_HOT_PATCHABLE = false, HOT_PATCHABLE = true };
void
patch_branch(dr_isa_mode_t isa_mode, cache_pc branch_pc, cache_pc target_pc,
bool hot_patch);
bool
link_direct_exit(dcontext_t *dcontext, fragment_t *f, linkstub_t *l, fragment_t *targetf,
bool hot_patch);
void
unlink_direct_exit(dcontext_t *dcontext, fragment_t *f, linkstub_t *l);
void
link_indirect_exit(dcontext_t *dcontext, fragment_t *f, linkstub_t *l, bool hot_patch);
void
unlink_indirect_exit(dcontext_t *dcontext, fragment_t *f, linkstub_t *l);
void
insert_fragment_prefix(dcontext_t *dcontext, fragment_t *f);
int
fragment_prefix_size(uint flags);
void
update_indirect_exit_stub(dcontext_t *dcontext, fragment_t *f, linkstub_t *l);
#ifdef PROFILE_RDTSC
uint
profile_call_size(void);
void
insert_profile_call(cache_pc start_pc);
void
finalize_profile_call(dcontext_t *dcontext, fragment_t *f);
#endif
int
decode_syscall_num(dcontext_t *dcontext, byte *entry);
#ifdef WINDOWS
void
link_shared_syscall(dcontext_t *dcontext);
void
unlink_shared_syscall(dcontext_t *dcontext);
#endif
size_t
syscall_instr_length(dr_isa_mode_t mode);
bool
is_syscall_at_pc(dcontext_t *dcontext, app_pc pc);
cache_pc
entrance_stub_jmp(cache_pc stub);
cache_pc
entrance_stub_jmp_target(cache_pc stub);
app_pc
entrance_stub_target_tag(cache_pc stub, coarse_info_t *info);
bool
coarse_is_indirect_stub(cache_pc stub);
bool
coarse_cti_is_intra_fragment(dcontext_t *dcontext, coarse_info_t *info, instr_t *inst,
cache_pc start_pc);
cache_pc
coarse_indirect_stub_jmp_target(cache_pc stub);
uint
coarse_indirect_stub_size(coarse_info_t *info);
bool
coarse_is_entrance_stub(cache_pc stub);
bool
coarse_is_trace_head(cache_pc stub);
bool
entrance_stub_linked(cache_pc stub, coarse_info_t *info );
void
link_entrance_stub(dcontext_t *dcontext, cache_pc stub, cache_pc tgt, bool hot_patch,
coarse_info_t *info );
void
unlink_entrance_stub(dcontext_t *dcontext, cache_pc stub, uint flags,
coarse_info_t *info );
cache_pc
entrance_stub_from_cti(cache_pc cti);
uint
coarse_exit_prefix_size(coarse_info_t *info);
byte *
emit_coarse_exit_prefix(dcontext_t *dcontext, byte *pc, coarse_info_t *info);
void
patch_coarse_exit_prefix(dcontext_t *dcontext, coarse_info_t *info);
bool
special_ibl_xfer_is_thread_private(void);
void
link_special_ibl_xfer(dcontext_t *dcontext);
void
unlink_special_ibl_xfer(dcontext_t *dcontext);
cache_pc
get_client_ibl_xfer_entry(dcontext_t *dcontext);
#ifdef UNIX
cache_pc
get_native_plt_ibl_xfer_entry(dcontext_t *dcontext);
cache_pc
get_native_ret_ibl_xfer_entry(dcontext_t *dcontext);
#endif
enum {
#ifdef X86
* jcc branch hint!
*/
CBR_LONG_LENGTH = 6,
JMP_LONG_LENGTH = 5,
JMP_SHORT_LENGTH = 2,
CBR_SHORT_REWRITE_LENGTH = 9,
RET_0_LENGTH = 1,
PUSH_IMM32_LENGTH = 5,
POPF_LENGTH = 1,
CTI_IND1_LENGTH = 2,
CTI_IND2_LENGTH = 3,
CTI_IND3_LENGTH = 4,
CTI_DIRECT_LENGTH = 5,
CTI_IAT_LENGTH = 6,
CTI_FAR_ABS_LENGTH = 7,
#elif defined(AARCH64)
CBR_LONG_LENGTH = 4,
JMP_LONG_LENGTH = 4,
JMP_SHORT_LENGTH = 4,
CBR_SHORT_REWRITE_LENGTH = 4,
SVC_LENGTH = 4,
#elif defined(ARM)
CBR_LONG_LENGTH = ARM_INSTR_SIZE,
JMP_LONG_LENGTH = ARM_INSTR_SIZE,
JMP_SHORT_LENGTH = THUMB_SHORT_INSTR_SIZE,
CBR_SHORT_REWRITE_LENGTH = 6,
SVC_THUMB_LENGTH = THUMB_SHORT_INSTR_SIZE,
SVC_ARM_LENGTH = ARM_INSTR_SIZE,
#elif defined(RISCV64)
CBR_LONG_LENGTH = 4,
JMP_LONG_LENGTH = 4,
JMP_SHORT_LENGTH = 4,
CBR_SHORT_REWRITE_LENGTH = 4,
#endif
#if defined(AARCH64) || defined(RISCV64)
INT_LENGTH = 4,
SYSCALL_LENGTH = 4,
SYSENTER_LENGTH = 4,
#else
INT_LENGTH = 2,
SYSCALL_LENGTH = 2,
SYSENTER_LENGTH = 2,
#endif
};
#define REL32_REACHABLE_OFFS(offs) ((offs) <= INT_MAX && (offs) >= INT_MIN)
#define REL32_REACHABLE(source, target) REL32_REACHABLE_OFFS((target) - (source))
* the jmp in jmp_target as if was located at app_loc. */
bool
is_jmp_rel32(byte *code_buf, app_pc app_loc, app_pc *jmp_target );
* the jmp in jmp_target as if was located at app_loc. */
bool
is_jmp_rel8(byte *code_buf, app_pc app_loc, app_pc *jmp_target );
* which is of course highest for the case of indirect branch mangling.
* Normal lea, jecxz, lea is 14, NATIVE_RETURN (now removed) could get above 20,
* but this should cover everything, fine to be well above, this is
* only used to keep below the maximum trace size for the next bb,
* we calculate the exact size in fixup_last_cti().
*
* For x64 we have to increase this (PR 333576 hit this):
* +19 L4 65 48 89 0c 25 10 00 mov %rcx -> %gs:0x10
* 00 00
* +28 L4 48 8b c8 mov %rax -> %rcx
* +31 L4 e9 1b e2 f6 ff jmp $0x00000000406536e0 <shared_bb_ibl_indjmp>
* (+36)
* =>
* +120 L0 65 48 89 0c 25 10 00 mov %rcx -> %gs:0x10
* 00 00
* +129 L0 48 8b c8 mov %rax -> %rcx
* +132 L3 65 48 a3 00 00 00 00 mov %rax -> %gs:0x00
* 00 00 00 00
* +143 L3 48 b8 23 24 93 28 00 mov $0x0000000028932423 -> %rax
* 00 00 00
* +153 L3 65 48 a3 08 00 00 00 mov %rax -> %gs:0x08
* 00 00 00 00
* +164 L3 9f lahf -> %ah
* +165 L3 0f 90 c0 seto -> %al
* +168 L3 65 48 3b 0c 25 08 00 cmp %rcx %gs:0x08
* 00 00
* +177 L4 0f 85 a9 d7 f6 ff jnz $0x000000004065312f <shared_trace_cmp_indjmp>
* +183 L3 65 48 8b 0c 25 10 00 mov %gs:0x10 -> %rcx
* 00 00
* +192 L3 04 7f add $0x7f %al -> %al
* +194 L3 9e sahf %ah
* +195 L3 65 48 a1 00 00 00 00 mov %gs:0x00 -> %rax
* 00 00 00 00
* +206
*
* (36-19)=17 vs (206-120)=86 => 69 bytes. was 65 bytes prior to PR 209709!
* usually 3 bytes smaller since don't need to restore eflags.
*/
#define TRACE_CTI_MANGLE_SIZE_UPPER_BOUND 72
fragment_t *
build_basic_block_fragment(dcontext_t *dcontext, app_pc start_pc, uint initial_flags,
bool linked, bool visible, bool for_trace,
instrlist_t **unmangled_ilist);
void
interp(dcontext_t *dcontext);
uint
extend_trace(dcontext_t *dcontext, fragment_t *f, linkstub_t *prev_l);
int
append_trace_speculate_last_ibl(dcontext_t *dcontext, instrlist_t *trace,
app_pc speculate_next_tag, bool record_translation);
* and this function would be removed from end_and_emit_trace and
* recreate_fragment_ilist, which are where it is currently called.
*/
#ifdef AARCH64
int
fixup_indirect_trace_exit(dcontext_t *dcontext, instrlist_t *trace);
#endif
uint
forward_eflags_analysis(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr);
* assuming that the instr_t flags correspond to the start of the fragment_t
*/
uint
instr_eflags_to_fragment_eflags(uint instr_eflags);
instrlist_t *
decode_fragment(dcontext_t *dcontext, fragment_t *f, byte *buf, uint *bufsz,
uint target_flags, uint *dir_exits, uint *indir_exits);
instrlist_t *
decode_fragment_exact(dcontext_t *dcontext, fragment_t *f, byte *buf,
uint *bufsz, uint target_flags,
uint *dir_exits, uint *indir_exits);
fragment_t *
copy_fragment(dcontext_t *dcontext, fragment_t *f, bool replace);
void
shift_ctis_in_fragment(dcontext_t *dcontext, fragment_t *f, ssize_t shift, cache_pc start,
cache_pc end, size_t old_size);
#ifdef PROFILE_RDTSC
void
add_profile_call(dcontext_t *dcontext);
#endif
app_pc
d_r_emulate(dcontext_t *dcontext, app_pc pc, priv_mcontext_t *mc);
bool
instr_is_trace_cmp(dcontext_t *dcontext, instr_t *inst);
typedef struct {
app_pc region_start;
app_pc region_end;
app_pc start_pc;
app_pc min_pc;
app_pc max_pc;
app_pc bb_end;
bool contiguous;
bool overlap;
} overlap_info_t;
instrlist_t *
build_app_bb_ilist(dcontext_t *dcontext, byte *start_pc, file_t outf);
void
bb_build_abort(dcontext_t *dcontext, bool clean_vmarea, bool unlock);
bool
expand_should_set_translation(dcontext_t *dcontext);
* Use recreate_fragment_ilist() for building an instrlist_t for a fragment.
* If check_vm_area is false, Does NOT call check_thread_vm_area()!
* Make sure you know it will terminate at the right spot. It does
* check selfmod and native_exec for elision, but otherwise will
* follow ubrs to the limit. Currently used for
* record_translation_info() (case 3559).
*/
instrlist_t *
recreate_bb_ilist(dcontext_t *dcontext, byte *pc, byte *pretend_pc,
app_pc stop_pc , uint flags,
uint *res_flags, uint *res_exit_type, bool check_vm_area, bool mangle,
void **vmlist, bool call_client, bool for_trace);
instrlist_t *
recreate_fragment_ilist(dcontext_t *dcontext, byte *pc,
fragment_t **f_res, bool *alloc, bool mangle,
bool call_client);
app_pc
find_app_bb_end(dcontext_t *dcontext, byte *start_pc, uint flags);
bool
app_bb_overlaps(dcontext_t *dcontext, byte *start_pc, uint flags, byte *region_start,
byte *region_end, overlap_info_t *info_res);
bool
reached_image_entry_yet(void);
void
set_reached_image_entry(void);
void
insert_clean_call_with_arg_jmp_if_ret_true(dcontext_t *dcontext, instrlist_t *ilist,
instr_t *instr, void *callee, int arg,
app_pc jmp_tag, instr_t *jmp_instr);
#ifdef UNIX
bool
mangle_syscall_code(dcontext_t *dcontext, fragment_t *f, byte *pc, bool skip);
#endif
void
finalize_selfmod_sandbox(dcontext_t *dcontext, fragment_t *f);
bool
instr_check_xsp_mangling(dcontext_t *dcontext, instr_t *inst, int *xsp_adjust);
bool
instr_supports_simple_mangling_epilogue(dcontext_t *dcontext, instr_t *inst);
void
float_pc_update(dcontext_t *dcontext);
void
mangle_finalize(dcontext_t *dcontext, instrlist_t *ilist, fragment_t *f);
#ifdef CHECK_RETURNS_SSE2
void
finalize_return_check(dcontext_t *dcontext, fragment_t *f);
#endif
#ifdef RETURN_AFTER_CALL
void
add_return_target(dcontext_t *dcontext, app_pc instr_pc, instr_t *instr);
int
ret_after_call_check(dcontext_t *dcontext, app_pc target_addr, app_pc src_addr);
bool
is_observed_call_site(dcontext_t *dcontext, app_pc retaddr);
#endif
void
optimize_trace(dcontext_t *dcontext, app_pc tag, instrlist_t *trace);
#ifdef DEBUG
void
print_optimization_stats(void);
#endif
#ifdef SIDELINE
# include "sideline.h"
#endif
#include "../link.h"
static inline ibl_branch_type_t
extract_branchtype(ushort linkstub_flags)
{
if (TEST(LINK_RETURN, linkstub_flags))
return IBL_RETURN;
if (EXIT_IS_CALL(linkstub_flags))
return IBL_INDCALL;
if (TEST(LINK_JMP, linkstub_flags))
return IBL_INDJMP;
ASSERT_NOT_REACHED();
return IBL_GENERIC;
}
static inline uint
ibltype_to_linktype(ibl_branch_type_t ibltype)
{
if (ibltype == IBL_RETURN)
return LINK_INDIRECT | LINK_RETURN;
if (ibltype == IBL_INDCALL)
return LINK_INDIRECT | LINK_CALL;
if (ibltype == IBL_INDJMP)
return LINK_INDIRECT | LINK_JMP;
ASSERT_NOT_REACHED();
return 0;
}
#ifdef DEBUG
bool
is_ibl_routine_type(dcontext_t *dcontext, cache_pc target, ibl_branch_type_t branch_type);
#endif
* allow a data pointer to be converted to a function pointer to allow
* better type-checking for the rest of our C code
*/
static inline generic_func_t
convert_data_to_function(void *data_ptr)
{
#ifdef WINDOWS
# pragma warning(push)
# pragma warning(disable : 4055)
#endif
return (generic_func_t)data_ptr;
#ifdef WINDOWS
# pragma warning(pop)
#endif
}
* exceptions and to implement an internal generic try-catch mechanism
* later on (case 1800).
* We could use a priv_mcontext_t here, but that has 4 extra fields that aren't
* used here. TODO: we could use it and live with the wastage?
* Espcially in light of the merging from PR 218131.
*
* edx & eax need not be saved because they are scratch registers in a
* call, i.e., caller-save; they are used to return values from functions.
* As longjmp is implemented as return from setjmp, eax & edx need not be saved.
*/
typedef struct dr_jmp_buf_t {
#ifdef X86
reg_t xbx;
reg_t xcx;
reg_t xdi;
reg_t xsi;
reg_t xbp;
reg_t xsp;
reg_t xip;
# ifdef X64
reg_t r8, r9, r10, r11, r12, r13, r14, r15;
# endif
#elif defined(ARM)
# define REGS_IN_JMP_BUF 26
reg_t regs[REGS_IN_JMP_BUF];
#elif defined(AARCH64)
# define REGS_IN_JMP_BUF 22
reg_t regs[REGS_IN_JMP_BUF];
#elif defined(RISCV64)
# define REGS_IN_JMP_BUF 26
reg_t regs[REGS_IN_JMP_BUF];
#endif
#if defined(UNIX) && defined(DEBUG)
* mask in the fault's signal frame, but we do record it in debug
* build to verify our assumptions
*/
kernel_sigset_t sigmask;
#endif
} dr_jmp_buf_t;
int
dr_longjmp(dr_jmp_buf_t *buf, int val);
int
dr_setjmp(dr_jmp_buf_t *buf);
bool
safe_read_fast(const void *base, size_t size, void *out_buf, size_t *bytes_read);
bool
is_safe_read_pc(app_pc pc);
app_pc
safe_read_resume_pc(void);
#ifdef UNIX
# ifdef memcpy
# undef memcpy
# endif
# ifdef memset
# undef memset
# endif
void *
memcpy(void *dst, const void *src, size_t n);
void *
memset(void *dst, int val, size_t n);
void *
memmove(void *dst, const void *src, size_t n);
#endif
#ifdef UNIX
void *
_dynamorio_runtime_resolve(void);
#endif
#define DR_SETJMP(buf) (dr_setjmp(buf))
#define DR_LONGJMP(buf, val) \
do { \
ASSERT(val != 0); \
dr_longjmp(buf, val); \
} while (0)
* These assume that we're at function entry, (i.e., mc->xsp points at the
* return address on X86, or mc->sp points at the first on-stack arg on ARM).
* Compare the SYS_PARAM* macros and REGPARM* enum: some duplication there.
*
* Note that, in X64, if a param is 32 bits we must ignore the top 32 bits
* of its stack slot (Since passed via "mov dword" instead of "push", top
* bits are garbage.)
*/
#ifdef X86
# ifdef X64
# ifdef WINDOWS
# define APP_PARAM_0(mc) (mc)->xcx
# define APP_PARAM_1(mc) (mc)->xdx
# define APP_PARAM_2(mc) (mc)->r8
# define APP_PARAM_3(mc) (mc)->r9
# define APP_PARAM_4(mc) (*(((reg_t *)((mc)->xsp)) + 5))
# define APP_PARAM_5(mc) (*(((reg_t *)((mc)->xsp)) + 6))
# define APP_PARAM_6(mc) (*(((reg_t *)((mc)->xsp)) + 7))
# define APP_PARAM_7(mc) (*(((reg_t *)((mc)->xsp)) + 8))
# define APP_PARAM_8(mc) (*(((reg_t *)((mc)->xsp)) + 9))
# define APP_PARAM_9(mc) (*(((reg_t *)((mc)->xsp)) + 10))
# define APP_PARAM_10(mc) (*(((reg_t *)((mc)->xsp)) + 11))
# else
# define APP_PARAM_0(mc) (mc)->xdi
# define APP_PARAM_1(mc) (mc)->xsi
# define APP_PARAM_2(mc) (mc)->rdx
# define APP_PARAM_3(mc) (mc)->rcx
# define APP_PARAM_4(mc) (mc)->r8
# define APP_PARAM_5(mc) (mc)->r9
# define APP_PARAM_6(mc) (*(((reg_t *)((mc)->xsp)) + 1))
# define APP_PARAM_7(mc) (*(((reg_t *)((mc)->xsp)) + 2))
# define APP_PARAM_8(mc) (*(((reg_t *)((mc)->xsp)) + 3))
# define APP_PARAM_9(mc) (*(((reg_t *)((mc)->xsp)) + 4))
# define APP_PARAM_10(mc) (*(((reg_t *)((mc)->xsp)) + 5))
# endif
# define APP_PARAM(mc, offs) APP_PARAM_##offs(mc)
# else
# define APP_PARAM(mc, offs) (*(((reg_t *)((mc)->xsp)) + (offs) + 1))
# endif
#elif defined(ARM)
# ifdef UNIX
# define APP_PARAM_0(mc) (mc)->r0
# define APP_PARAM_1(mc) (mc)->r1
# define APP_PARAM_2(mc) (mc)->r2
# define APP_PARAM_3(mc) (mc)->r3
# ifdef X64
# define APP_PARAM_4(mc) (mc)->r4
# define APP_PARAM_5(mc) (mc)->r5
# define APP_PARAM_6(mc) (mc)->r6
# define APP_PARAM_7(mc) (mc)->r7
# define APP_PARAM_8(mc) (*(((reg_t *)((mc)->xsp)) + 0))
# define APP_PARAM_9(mc) (*(((reg_t *)((mc)->xsp)) + 1))
# define APP_PARAM_10(mc) (*(((reg_t *)((mc)->xsp)) + 2))
# else
# define APP_PARAM_4(mc) (*(((reg_t *)((mc)->xsp)) + 0))
# define APP_PARAM_5(mc) (*(((reg_t *)((mc)->xsp)) + 1))
# define APP_PARAM_6(mc) (*(((reg_t *)((mc)->xsp)) + 2))
# define APP_PARAM_7(mc) (*(((reg_t *)((mc)->xsp)) + 3))
# define APP_PARAM_8(mc) (*(((reg_t *)((mc)->xsp)) + 4))
# define APP_PARAM_9(mc) (*(((reg_t *)((mc)->xsp)) + 5))
# define APP_PARAM_10(mc) (*(((reg_t *)((mc)->xsp)) + 6))
# endif
# else
# error Windows is not supported
# endif
# define APP_PARAM(mc, offs) APP_PARAM_##offs(mc)
#endif
#define MCXT_SYSNUM_REG(mc) ((mc)->MCXT_FLD_SYSNUM_REG)
#define MCXT_FIRST_REG_FIELD(mc) ((mc)->MCXT_FLD_FIRST_REG)
static inline reg_t
get_mcontext_frame_ptr(dcontext_t *dcontext, priv_mcontext_t *mc)
{
reg_t reg;
switch (dr_get_isa_mode(dcontext)) {
#ifdef X86
case DR_ISA_IA32:
case DR_ISA_AMD64: reg = mc->xbp; break;
#elif defined(ARM)
case DR_ISA_ARM_THUMB: reg = mc->r7; break;
case DR_ISA_ARM_A32: reg = mc->r11; break;
#elif defined(AARCH64)
case DR_ISA_ARM_A64: reg = mc->r29; break;
#elif defined(RISCV64)
case DR_ISA_RV64IMAFDC: reg = mc->x8; break;
#endif
default: ASSERT_NOT_REACHED(); reg = 0;
}
return reg;
}
* Keep in synch w/ defines in x86.asm
*/
#define CS32_SELECTOR 0x23
#define CS64_SELECTOR 0x33
#ifdef ARM
void
encode_reset_it_block(dcontext_t *dcontext);
* being-freed instr is involved.
*/
void
encode_instr_freed_event(dcontext_t *dcontext, instr_t *instr);
#endif
#ifdef LINUX
typedef struct _rseq_entry_state_t {
reg_t gpr[DR_NUM_GPR_REGS];
} rseq_entry_state_t;
#endif
#endif