* Copyright (c) 2010-2023 Google, Inc. All rights reserved.
* Copyright (c) 2011 Massachusetts Institute of Technology All rights reserved.
* Copyright (c) 2000-2010 VMware, Inc. All rights reserved.
* *******************************************************************************/
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
* os.c - Linux specific routines
*/
* This limits us to 2.4+ kernel but that's ok.
* I don't really want to get into requiring kernel headers to build
* general release packages, though that would be fine for targeted builds.
* There are 3 different stat syscalls (SYS_oldstat, SYS_stat, and SYS_stat64)
* and using _LARGEFILE64_SOURCE with SYS_stat64 is the best match.
*/
#ifndef _LARGEFILE64_SOURCE
# define _LARGEFILE64_SOURCE
#endif
#include <sys/types.h>
#include <sys/mman.h>
#ifndef MAP_32BIT
# define MAP_32BIT 0x40
#endif
#ifndef MAP_ANONYMOUS
# define MAP_ANONYMOUS MAP_ANON
#endif
#include <sys/stat.h>
#include <fcntl.h>
#include "../globals.h"
#include "../hashtable.h"
#include "../native_exec.h"
#include <unistd.h>
#include <limits.h>
#ifdef MACOS
# include <sys/sysctl.h>
# ifndef SYS___sysctl
# define SYS___sysctl SYS_sysctl
# endif
# include <mach/mach_traps.h>
# include "include/syscall_mach.h"
#endif
#ifdef LINUX
# include <sys/vfs.h>
#elif defined(MACOS)
# include <sys/mount.h>
# include <mach/mach.h>
# include <mach/task.h>
# include <mach/semaphore.h>
# include <mach/sync_policy.h>
#endif
#include <dirent.h>
#include <sys/time.h>
#include <sys/resource.h>
#ifndef X64
struct compat_rlimit {
uint rlim_cur;
uint rlim_max;
};
#endif
#ifdef MACOS
typedef struct rlimit rlimit64_t;
#else
typedef struct rlimit64 rlimit64_t;
#endif
#ifdef LINUX
* defined. _GNU_SOURCE brings in unwanted extensions and causes name
* conflicts. Instead, we include unix/sched.h which comes from the Linux
* kernel headers.
*/
# include <linux/sched.h>
#endif
#include "module.h"
#include "tls.h"
#if defined(X86) && defined(DEBUG)
# include "os_asm_defines.asm"
#endif
#ifndef F_DUPFD_CLOEXEC
# define F_DUPFD_CLOEXEC 1030
#endif
* For example, MacOS has some 32-bit syscalls that return 64-bit
* values in xdx:xax.
*/
#define MCXT_SYSCALL_RES(mc) ((mc)->IF_X86_ELSE(xax, IF_RISCV64_ELSE(a0, r0)))
#if defined(DR_HOST_AARCH64)
# if defined(MACOS)
# define READ_TP_TO_R3_DISP_IN_R2 \
"mrs " ASM_R3 ", tpidrro_el0\n\t" \
"ldr " ASM_R3 ", [" ASM_R3 ", " ASM_R2 "] \n\t"
# else
# define READ_TP_TO_R3_DISP_IN_R2 \
"mrs " ASM_R3 ", tpidr_el0\n\t" \
"ldr " ASM_R3 ", [" ASM_R3 ", " ASM_R2 "] \n\t"
# endif
#elif defined(DR_HOST_ARM)
# define READ_TP_TO_R3_DISP_IN_R2 \
"mrc p15, 0, " ASM_R3 \
", c13, c0, " STRINGIFY(USR_TLS_REG_OPCODE) " \n\t" \
"ldr " ASM_R3 ", [" ASM_R3 \
", " ASM_R2 "] \n\t"
#endif
typedef int (*init_fn_t)(int argc, char **argv, char **envp);
#ifndef STATIC_LIBRARY
* on the stack, which is different from what libc __environ may point at. We
* use the environment for following children and setting options, so its OK
* that we don't see what libc says.
*/
char **our_environ;
#endif
#include <errno.h>
#if !defined(STANDALONE_UNIT_TEST) && !defined(MACOS)
# undef errno
#endif
#include "instr.h"
#include "decode_fast.h"
#include <dlfcn.h>
#include <stdlib.h>
#include <stdio.h>
#include <signal.h>
#include <syslog.h>
#include "../vmareas.h"
#ifdef RCT_IND_BRANCH
# include "../rct.h"
#endif
#ifdef LINUX
# include "include/syscall.h"
# include "include/clone3.h"
# include "include/close_range.h"
#else
# include <sys/syscall.h>
#endif
#include "../module_shared.h"
#include "os_private.h"
#include "../synch.h"
#include "memquery.h"
#include "ksynch.h"
#include "dr_tools.h"
#ifndef HAVE_MEMINFO_QUERY
# include "memcache.h"
#endif
#include "instrument.h"
#ifdef LINUX
# include "rseq_linux.h"
#endif
#ifdef MACOS
# define SYSNUM_EXIT_PROCESS SYS_exit
# define SYSNUM_EXIT_THREAD SYS_bsdthread_terminate
#else
# define SYSNUM_EXIT_PROCESS SYS_exit_group
# define SYSNUM_EXIT_THREAD SYS_exit
#endif
#ifdef ANDROID
# define PR_SET_VMA 0x53564d41
# define PR_SET_VMA_ANON_NAME 0
#endif
DECLARE_CXTSWPROT_VAR(static mutex_t set_thread_area_lock,
INIT_LOCK_FREE(set_thread_area_lock));
static bool first_thread_tls_initialized;
static bool last_thread_tls_exited;
tls_type_t tls_global_type;
#ifndef HAVE_TLS
* tons of threads anyway
*/
# define MAX_THREADS 512
typedef struct _tls_slot_t {
thread_id_t tid;
dcontext_t *dcontext;
} tls_slot_t;
static tls_slot_t *tls_table;
DECLARE_CXTSWPROT_VAR(mutex_t tls_lock, INIT_LOCK_FREE(tls_lock));
#endif
* dr_raw_tls_calloc() docs.
*/
static bool client_tls_allocated[MAX_NUM_CLIENT_TLS];
DECLARE_CXTSWPROT_VAR(static mutex_t client_tls_lock, INIT_LOCK_FREE(client_tls_lock));
#include <stddef.h>
#include <sys/utsname.h>
static void
handle_execve_post(dcontext_t *dcontext);
static bool
os_switch_lib_tls(dcontext_t *dcontext, bool to_app);
static bool
os_switch_seg_to_context(dcontext_t *dcontext, reg_id_t seg, bool to_app);
#ifdef X86
static bool
os_set_dr_tls_base(dcontext_t *dcontext, os_local_state_t *tls, byte *base);
#endif
#ifdef LINUX
static bool
handle_app_mremap(dcontext_t *dcontext, byte *base, size_t size, byte *old_base,
size_t old_size, uint old_prot, uint old_type);
static void
handle_app_brk(dcontext_t *dcontext, byte *lowest_brk , byte *old_brk,
byte *new_brk);
#endif
static char dynamorio_library_path[MAXIMUM_PATH];
static char dynamorio_library_filepath[MAXIMUM_PATH];
static char dynamorio_libname_buf[MAXIMUM_PATH];
static const char *dynamorio_libname = dynamorio_libname_buf;
static char dynamorio_alt_arch_path[MAXIMUM_PATH];
static char dynamorio_alt_arch_filepath[MAXIMUM_PATH];
#define DR_LIBDIR_X86 STRINGIFY(LIBDIR_X86)
#define DR_LIBDIR_X64 STRINGIFY(LIBDIR_X64)
static void
get_dynamo_library_bounds(void);
static void
get_alt_dynamo_library_bounds(void);
static app_pc dynamo_dll_start = NULL;
static app_pc dynamo_dll_end = NULL;
static app_pc executable_start = NULL;
static app_pc executable_end = NULL;
static char executable_path[MAXIMUM_PATH];
static char *executable_basename;
* These are only written once during process init and we can live with
* the non-guaranteed-delay until they are visible to other cores.
*/
static int *app_argc = NULL;
static char **app_argv = NULL;
static bool kernel_thread_groups;
static bool kernel_64bit;
pid_t pid_cached;
static bool fault_handling_initialized;
#ifdef PROFILE_RDTSC
uint kilo_hertz;
#endif
DR_API file_t our_stdout = STDOUT_FILENO;
DR_API file_t our_stderr = STDERR_FILENO;
DR_API file_t our_stdin = STDIN_FILENO;
static rlimit64_t app_rlimit_nofile;
static int min_dr_fd;
* and so we can close them in a child of fork.
* the table key is the fd and the payload is the set of DR_FILE_* flags.
*/
static generic_table_t *fd_table;
#define INIT_HTABLE_SIZE_FD 6
* This is due to constraints on the order of invoking various init routines in
* the dynamorio_app_init_part_* routines.
* - dynamorio_app_init_part_one_options opens the global log file when logging
* is enabled in the debug build.
* - vmm_heap_unit_init opens the dual_map_file when -satisfy_w_xor_x is set.
* For these files, fd_table_add would not be able to really add the FD.
* Therefore, we have to remember them so that we can add it to fd_table later
* when we create it.
*/
#define MAX_FD_ADD_PRE_HEAP 2
static int fd_add_pre_heap[MAX_FD_ADD_PRE_HEAP];
static int fd_add_pre_heap_flags[MAX_FD_ADD_PRE_HEAP];
static int num_fd_add_pre_heap;
#ifdef LINUX
static byte *app_brk_map;
static byte *app_brk_cur;
static byte *app_brk_end;
#endif
#ifdef MACOS
static int macos_version;
#endif
static bool
is_readable_without_exception_internal(const byte *pc, size_t size, bool query_os);
static bool
mmap_check_for_module_overlap(app_pc base, size_t size, bool readable, uint64 inode,
bool at_map);
#ifdef LINUX
static char *
read_proc_self_exe(bool ignore_cache);
#endif
* this on Windows we should generalize it and export it to clients.
*/
typedef struct _dir_iterator_t {
file_t fd;
int off;
int end;
const char *name;
char buf[4 * MAXIMUM_PATH];
} dir_iterator_t;
static void
os_dir_iterator_start(dir_iterator_t *iter, file_t fd);
static bool
os_dir_iterator_next(dir_iterator_t *iter);
* randomly placed since fedora2.
* marked rx then: FIXME: should disallow this guy when that's the case!
* random vsyscall page is identified in maps files as "[vdso]"
* (kernel-provided fake shared library or Virt Dyn Shared Object).
*/
app_pc vsyscall_page_start = NULL;
app_pc vsyscall_syscall_end_pc = NULL;
app_pc vsyscall_sysenter_return_pc = NULL;
app_pc vsyscall_sysenter_displaced_pc = NULL;
#define VSYSCALL_PAGE_START_HARDCODED ((app_pc)(ptr_uint_t)0xffffe000)
#ifdef X64
* not executable
* ffffffffff600000-ffffffffffe00000 ---p 00000000 00:00 0 [vsyscall]
*/
# define VSYSCALL_REGION_MAPS_NAME "[vsyscall]"
#endif
app_pc vdso_page_start = NULL;
size_t vdso_size = 0;
#if !defined(STANDALONE_UNIT_TEST) && !defined(STATIC_LIBRARY)
* which it looks up by dispatching on the stack pointer. This doesn't work
* when within dynamo. Thus, we define our own __errno_location() for use both
* by us and the app, to prevent pthreads looking at the stack pointer when
* out of the code cache.
*/
* any problems with init_errno being set and then dcontext->errno being read?
* FIXME: if a thread issues a dr_app_stop, then we don't want to use
* this errno slot? But it may later do a start...probably ok to keep using
* the slot. But, when threads die, they'll all use the same init_errno!
*/
static int init_errno;
int *
__errno_location(void)
{
dcontext_t *dcontext = get_thread_private_dcontext();
if (dcontext == NULL)
return &init_errno;
else {
return &(dcontext->upcontext_ptr->dr_errno);
}
}
#endif
#ifdef HAVE_TLS
* (gdb) x/20i (*(errno_loc_t)0xf721e413)
* 0xf721e413 <__errno_location>: push %ebp
* 0xf721e414 <__errno_location+1>: mov %esp,%ebp
* 0xf721e416 <__errno_location+3>: call <__x86.get_pc_thunk.cx>
* 0xf721e41b <__errno_location+8>: add $0x166bd9,%ecx
* 0xf721e421 <__errno_location+14>: mov -0x1c(%ecx),%eax
* 0xf721e427 <__errno_location+20>: add %gs:0x0,%eax
* 0xf721e42e <__errno_location+27>: pop %ebp
* 0xf721e42f <__errno_location+28>: ret
*
* __errno_location calcuates the errno location by adding
* TLS's base with errno's offset in TLS.
* However, because the TLS has been switched in os_tls_init,
* the calculated address is wrong.
* We first get the errno offset in TLS at init time and
* calculate correct address by adding the app's tls base.
*/
* 0xb6f0b290 <__errno_location>: ldr r3, [pc, #12]
* 0xb6f0b292 <__errno_location+2>: mrc 15, 0, r0, cr13, cr0, {3}
* 0xb6f0b296 <__errno_location+6>: add r3, pc
* 0xb6f0b298 <__errno_location+8>: ldr r3, [r3, #0]
* 0xb6f0b29a <__errno_location+10>: adds r0, r0, r3
* 0xb6f0b29c <__errno_location+12>: bx lr
* It uses the predefined offset to get errno location in TLS,
* and we should be able to reuse the code here.
*/
static int libc_errno_tls_offs;
static int *
our_libc_errno_loc(void)
{
void *app_tls = os_get_app_tls_base(NULL, TLS_REG_LIB);
if (app_tls == NULL)
return NULL;
return (int *)(app_tls + libc_errno_tls_offs);
}
#endif
*
* Errno location is per-thread so we store the
* function globally and call it each time. Note that pthreads seems
* to be the one who provides per-thread errno: using raw syscalls to
* create threads, we end up with a global errno:
*
* > for i in linux.thread.*0/log.*; do grep 'libc errno' $i | head -1; done
* libc errno loc: 0x00007f153de26698
* libc errno loc: 0x00007f153de26698
* > for i in pthreads.pthreads.*0/log.*; do grep 'libc errno' $i | head -1; done
* libc errno loc: 0x00007fc24d1ce698
* libc errno loc: 0x00007fc24d1cd8b8
* libc errno loc: 0x00007fc24c7cc8b8
*/
typedef int *(*errno_loc_t)(void);
#ifdef LINUX
static bool is_clone3_enosys = false;
static bool is_sigqueueinfo_enosys = false;
#endif
int suspend_signum;
static errno_loc_t
get_libc_errno_location(bool do_init)
{
static errno_loc_t libc_errno_loc;
if (do_init) {
module_iterator_t *mi = module_iterator_start();
while (module_iterator_hasnext(mi)) {
module_area_t *area = module_iterator_next(mi);
const char *modname = GET_MODULE_NAME(&area->names);
* GET_MODULE_NAME never includes the path: i#138 will add path.
*/
if (modname != NULL && strstr(modname, "libc.so") == modname) {
bool found = true;
libc_errno_loc =
(errno_loc_t)d_r_get_proc_address(area->start, "__errno_location");
ASSERT(libc_errno_loc != NULL);
LOG(GLOBAL, LOG_THREADS, 2, "libc errno loc func: " PFX "\n",
libc_errno_loc);
* to app's libc. So right now, we still need this routine.
* we can remove this after libc independency and/or
* early injection
*/
if (INTERNAL_OPTION(private_loader)) {
acquire_recursive_lock(&privload_lock);
if (privload_lookup_by_base(area->start) != NULL)
found = false;
release_recursive_lock(&privload_lock);
}
if (found)
break;
}
}
module_iterator_stop(mi);
#ifdef HAVE_TLS
* then we don't need to do this.
*/
if (INTERNAL_OPTION(private_loader) && libc_errno_loc != NULL) {
void *priv_lib_tls_base = os_get_priv_tls_base(NULL, TLS_REG_LIB);
ASSERT(priv_lib_tls_base != NULL);
libc_errno_tls_offs = (void *)libc_errno_loc() - priv_lib_tls_base;
libc_errno_loc = &our_libc_errno_loc;
}
#endif
}
return libc_errno_loc;
}
* we have libc independence or our own private isolated libc we need
* to preserve the app's libc's errno
*/
int
get_libc_errno(void)
{
#if defined(STANDALONE_UNIT_TEST) && (defined(MACOS) || defined(ANDROID))
return errno;
#else
# ifdef STANDALONE_UNIT_TEST
errno_loc_t func = __errno_location;
# else
errno_loc_t func = get_libc_errno_location(false);
# endif
if (func == NULL) {
return 0;
} else {
int *loc = (*func)();
ASSERT(loc != NULL);
LOG(THREAD_GET, LOG_THREADS, 5, "libc errno loc: " PFX "\n", loc);
if (loc != NULL)
return *loc;
}
return 0;
#endif
}
* h_errno and res_state. See glibc-2.2.4/linuxthreads/errno.c.
* If dynamo ever modifies those we'll need to do to them what we now do to
* errno.
*/
* does an execve of /bin/sh -- not sure what's going on, but using our
* own implementation of unsetenv fixes all our problems. If we use
* libc's, unsetenv either does nothing or ends up having getenv return
* NULL for other vars that are obviously set (by iterating through environ).
* FIXME: find out the real story here.
*/
int
our_unsetenv(const char *name)
{
size_t name_len;
char **env = our_environ;
if (name == NULL || *name == '\0' || strchr(name, '=') != NULL) {
return -1;
}
ASSERT(our_environ != NULL);
if (our_environ == NULL)
return -1;
name_len = strlen(name);
while (*env != NULL) {
if (strncmp(*env, name, name_len) == 0 && (*env)[name_len] == '=') {
* handle later matches.
*/
char **e;
for (e = env; *e != NULL; e++)
*e = *(e + 1);
} else {
env++;
}
}
return 0;
}
bool
disable_env(const char *name)
{
size_t name_len;
char **env = our_environ;
if (name == NULL || *name == '\0' || strchr(name, '=') != NULL) {
return false;
}
ASSERT(our_environ != NULL);
if (our_environ == NULL)
return false;
name_len = strlen(name);
while (*env != NULL) {
if (strncmp(*env, name, name_len) == 0 && (*env)[name_len] == '=') {
* up access to auxv, which is after the env block, so we instead
* disable the env var by changing its name.
* We keep going to handle later matches.
*/
snprintf(*env, name_len, "__disabled__");
}
env++;
}
return true;
}
*/
char *
our_getenv(const char *name)
{
char **env = our_environ;
size_t i;
size_t name_len;
if (name == NULL || name[0] == '\0' || strchr(name, '=') != NULL) {
return NULL;
}
ASSERT_MESSAGE(CHKLVL_ASSERTS,
"our_environ is missing. _init() or "
"dynamorio_set_envp() were not called",
our_environ != NULL);
if (our_environ == NULL)
return NULL;
name_len = strlen(name);
for (i = 0; env[i] != NULL; i++) {
if (strncmp(env[i], name, name_len) == 0 && env[i][name_len] == '=') {
return env[i] + name_len + 1;
}
}
return NULL;
}
bool
is_our_environ_followed_by_auxv(void)
{
#ifdef STATIC_LIBRARY
* the stack (i#2122).
*/
return false;
#else
return true;
#endif
}
* routine down below, but drpreload.so comes first and calls
* dynamorio_app_init before our own _init routine gets called. Apps using the
* app API are unaffected because our _init routine will have run by then. For
* STATIC_LIBRARY, we used to set our_environ in our_init(), but to support
* the app setting DYNAMORIO_OPTIONS after our_init() runs, we now just use environ.
*/
DYNAMORIO_EXPORT
void
dynamorio_set_envp(char **envp)
{
our_environ = envp;
}
static int
our_init(int argc, char **argv, char **envp)
{
* drpreload, this is called *after* we have already taken over.
*/
extern void dynamorio_app_take_over(void);
bool takeover = false;
#ifdef INIT_TAKE_OVER
takeover = true;
#endif
#ifdef VMX86_SERVER
takeover = os_in_vmkernel_classic();
#endif
#ifndef STATIC_LIBRARY
if (our_environ != NULL) {
ASSERT(our_environ == envp);
} else {
our_environ = envp;
}
#endif
#ifdef STATIC_LIBRARY
if (!takeover) {
const char *takeover_env = getenv("DYNAMORIO_TAKEOVER_IN_INIT");
if (takeover_env != NULL && strcmp(takeover_env, "1") == 0) {
takeover = true;
}
}
#endif
if (takeover) {
if (dynamorio_app_init() == 0 ) {
dynamorio_app_take_over();
}
}
return 0;
}
#if defined(STATIC_LIBRARY) || defined(STANDALONE_UNIT_TEST) || defined(RISCV64)
* like the app's exe or unit_tests, we add a pointer to our_init() to the
* .init_array section. We can't use the constructor attribute because not all
* toolchains pass the args and environment to the constructor.
*
* RISC-V, as a new ISA, does not support obsolete .init section, so we always use
* .init_array section for RISC-V.
*/
static init_fn_t
# ifdef MACOS
__attribute__((section("__DATA,__mod_init_func"), aligned(sizeof(void *)), used))
# else
__attribute__((section(".init_array"), aligned(sizeof(void *)), used))
# endif
init_array[] = { our_init };
#else
*/
int
_init(int argc, char **argv, char **envp)
{
# ifdef ANDROID
* rely on DR being listed before libc so we can read the TLS slot the
* kernel set up.
*/
if (!get_kernel_args(&argc, &argv, &envp)) {
argc = 0;
argv = NULL;
envp = NULL;
}
ASSERT_MESSAGE(CHKLVL_ASSERTS, "failed to find envp", envp != NULL);
# endif
return our_init(argc, argv, envp);
}
#endif
bool
kernel_is_64bit(void)
{
return kernel_64bit;
}
#ifdef MACOS
static bool
sysctl_query(int level0, int level1, void *buf, size_t bufsz)
{
int res;
int name[2];
size_t len = bufsz;
name[0] = level0;
name[1] = level1;
res = dynamorio_syscall(SYS___sysctl, 6, &name, 2, buf, &len, NULL, 0);
return (res >= 0);
}
int
os_get_version(void)
{
return macos_version;
}
#endif
static void
get_uname(void)
{
* or .data unprot
*/
static struct utsname uinfo;
#ifdef MACOS
if (!sysctl_query(CTL_KERN, KERN_OSTYPE, &uinfo.sysname, sizeof(uinfo.sysname)) ||
!sysctl_query(CTL_KERN, KERN_HOSTNAME, &uinfo.nodename, sizeof(uinfo.nodename)) ||
!sysctl_query(CTL_KERN, KERN_OSRELEASE, &uinfo.release, sizeof(uinfo.release)) ||
!sysctl_query(CTL_KERN, KERN_VERSION, &uinfo.version, sizeof(uinfo.version)) ||
!sysctl_query(CTL_HW, HW_MACHINE, &uinfo.machine, sizeof(uinfo.machine))) {
ASSERT(false && "sysctl queries failed");
return;
}
#else
DEBUG_DECLARE(int res =)
dynamorio_syscall(SYS_uname, 1, (ptr_uint_t)&uinfo);
ASSERT(res >= 0);
#endif
LOG(GLOBAL, LOG_TOP, 1, "uname:\n\tsysname: %s\n", uinfo.sysname);
LOG(GLOBAL, LOG_TOP, 1, "\tnodename: %s\n", uinfo.nodename);
LOG(GLOBAL, LOG_TOP, 1, "\trelease: %s\n", uinfo.release);
LOG(GLOBAL, LOG_TOP, 1, "\tversion: %s\n", uinfo.version);
LOG(GLOBAL, LOG_TOP, 1, "\tmachine: %s\n", uinfo.machine);
if (strncmp(uinfo.machine, "x86_64", sizeof("x86_64")) == 0)
kernel_64bit = true;
#ifdef MACOS
* vars for frontends to see the options but I'm still afraid of some syscall
* crash with no output: I'd rather have two messages than silent crashing.
*/
if (DYNAMO_OPTION(max_supported_os_version) != 0) {
# define MIN_DARWIN_VERSION_SUPPORTED 11
int kernel_major;
if (sscanf(uinfo.release, "%d", &kernel_major) != 1 ||
kernel_major > DYNAMO_OPTION(max_supported_os_version) ||
kernel_major < MIN_DARWIN_VERSION_SUPPORTED) {
SYSLOG(SYSLOG_WARNING, UNSUPPORTED_OS_VERSION, 3, get_application_name(),
get_application_pid(), uinfo.release);
}
macos_version = kernel_major;
}
#endif
}
#if defined(LINUX)
* we're running on. Particularly, we are interested in detecting missing
* support early-on for syscalls that require complex pre-syscall handling
* by DR. We use this information to fail early for those syscalls.
*
* XXX: Move other logic for detecting unsupported syscalls from their
* respective locations to here at init time, like that for
* SYS_memfd_create in os_create_memory_file.
*
*/
static void
detect_unsupported_syscalls()
{
* these args.
*/
int clone3_errno =
dynamorio_syscall(SYS_clone3, 2, NULL , 0 );
ASSERT(clone3_errno == -ENOSYS || clone3_errno == -EINVAL);
is_clone3_enosys = clone3_errno == -ENOSYS;
* on the signal number to support kernel variation.
*/
int sigqueue_errno = dynamorio_syscall(SYS_rt_tgsigqueueinfo, 4, get_process_id(),
get_sys_thread_id(), -1, NULL);
ASSERT(sigqueue_errno == -ENOSYS || sigqueue_errno == -EINVAL ||
sigqueue_errno == -EFAULT);
is_sigqueueinfo_enosys = sigqueue_errno == -ENOSYS;
if (!IS_STRING_OPTION_EMPTY(xarch_root)) {
* For now we pretend this syscall doesn't work, to get basic apps
* working under QEMU.
*/
is_sigqueueinfo_enosys = true;
}
}
#endif
bool
is_sigqueue_supported(void)
{
return IF_LINUX_ELSE(!is_sigqueueinfo_enosys, false);
}
void
d_r_os_init(void)
{
ksynch_init();
get_uname();
get_application_name();
get_application_base();
get_dynamo_library_bounds();
get_alt_dynamo_library_bounds();
* or whether getpid suffices. even 2.4 kernels have gettid
* (maps to getpid), don't have an old enough target to test this.
*/
#ifdef MACOS
kernel_thread_groups = (dynamorio_syscall(SYS_thread_selfid, 0) >= 0);
#else
kernel_thread_groups = (dynamorio_syscall(SYS_gettid, 0) >= 0);
#endif
LOG(GLOBAL, LOG_TOP | LOG_STATS, 1, "thread id is from %s\n",
kernel_thread_groups ? "gettid" : "getpid");
#ifdef MACOS
* thread id on 10.5, so we don't support it.
*/
if (!kernel_thread_groups) {
SYSLOG(SYSLOG_WARNING, UNSUPPORTED_OS_VERSION, 3, get_application_name(),
get_application_pid(), "Mac OSX 10.5 or earlier");
}
#else
ASSERT_CURIOSITY(kernel_thread_groups);
#endif
pid_cached = get_process_id();
#ifdef VMX86_SERVER
vmk_init();
#endif
#if defined(LINUX)
detect_unsupported_syscalls();
#endif
* We choose a normally-synchronous signal for a lower chance that the app has
* blocked it when we attach to an already-running app.
* On Linux where we have SYS_rt_sigqueueinfo we use
* SIGILL and share with nudges, distinguishing via the NUDGE_IS_SUSPEND flag.
* (For pre-2.6.31 Linux kernels without SYS_rt_sigqueueinfo nudges are not
* supported so there are no collisions there).
* (We initially used SIGSTKFLT but gdb has poor support for it.)
* Unfortunately, QEMU crashes when we send SIGILL or SIGFPE to its thread trying
* to take it over, so we are forced to dynamically vary the number and we switch
* to SIGSTKFLT for QEMU where we live with the lack of gdb support.
*/
suspend_signum = IF_MACOS_ELSE(SIGFPE, NUDGESIG_SIGNUM);
#ifdef LINUX
if (!IS_STRING_OPTION_EMPTY(xarch_root)) {
LOG(GLOBAL, LOG_TOP | LOG_ASYNCH, 1, "switching suspend signal to SIGSTKFLT\n");
suspend_signum = SIGSTKFLT;
}
#endif
d_r_signal_init();
fault_handling_initialized = true;
memquery_init();
#ifdef PROFILE_RDTSC
if (dynamo_options.profile_times) {
ASSERT_NOT_TESTED();
kilo_hertz = get_timer_frequency();
LOG(GLOBAL, LOG_TOP | LOG_STATS, 1, "CPU MHz is %d\n", kilo_hertz / 1000);
}
#endif
IF_NO_MEMQUERY(memcache_init());
fd_table = generic_hash_create(
GLOBAL_DCONTEXT, INIT_HTABLE_SIZE_FD, 80 ,
HASHTABLE_SHARED | HASHTABLE_PERSISTENT, NULL _IF_DEBUG("fd table"));
* not initialized.
*/
while (num_fd_add_pre_heap > 0) {
num_fd_add_pre_heap--;
fd_table_add(fd_add_pre_heap[num_fd_add_pre_heap],
fd_add_pre_heap_flags[num_fd_add_pre_heap]);
}
get_dynamorio_dll_start();
#ifdef LINUX
if (DYNAMO_OPTION(emulate_brk))
init_emulated_brk(NULL);
#endif
#ifdef ANDROID
* for non-client-interface as well, as this initializes DR_TLS_BASE_OFFSET
* (i#1931).
*/
init_android_version();
#endif
#ifdef LINUX
if (!standalone_library)
d_r_rseq_init();
#endif
#ifdef MACOS64
tls_process_init();
#endif
}
void
os_file_init(void)
{
* descriptor limit as viewed by the app, and block SYS_dup{2,3} and
* SYS_fcntl(F_DUPFD*) from creating a file explicitly in our space. We do
* not try to stop incremental file opening from extending into our space:
* if the app really is running out of fds, we'll give it some of ours:
* after all we probably don't need all -steal_fds, and if we really need fds
* we typically open them at startup. We also don't bother watching all
* syscalls that take in fds from affecting our fds.
*/
if (DYNAMO_OPTION(steal_fds) > 0) {
struct rlimit rlimit_nofile;
if (dynamorio_syscall(
IF_MACOS_ELSE(SYS_getrlimit, IF_X64_ELSE(SYS_getrlimit, SYS_ugetrlimit)),
2, RLIMIT_NOFILE, &rlimit_nofile) != 0) {
SYSLOG_INTERNAL_WARNING("getrlimit RLIMIT_NOFILE failed");
rlimit_nofile.rlim_cur = 1024;
rlimit_nofile.rlim_max = 1024;
}
* for simplicity and to give as much room as possible to app,
* raise soft limit to equal hard limit.
* if an app really depends on a low soft limit, they can run
* with -steal_fds 0.
*/
if (rlimit_nofile.rlim_max > DYNAMO_OPTION(steal_fds)) {
int res;
min_dr_fd = rlimit_nofile.rlim_max - DYNAMO_OPTION(steal_fds);
app_rlimit_nofile.rlim_max = min_dr_fd;
app_rlimit_nofile.rlim_cur = app_rlimit_nofile.rlim_max;
rlimit_nofile.rlim_cur = rlimit_nofile.rlim_max;
res = dynamorio_syscall(SYS_setrlimit, 2, RLIMIT_NOFILE, &rlimit_nofile);
if (res != 0) {
SYSLOG_INTERNAL_WARNING("unable to raise RLIMIT_NOFILE soft limit: %d",
res);
}
} else
SYSLOG_INTERNAL_WARNING("unable to reserve fds");
}
}
static char *
get_application_pid_helper(bool ignore_cache)
{
static char pidstr[16];
if (!pidstr[0] || ignore_cache) {
int pid = get_process_id();
snprintf(pidstr, sizeof(pidstr) - 1, "%d", pid);
}
return pidstr;
}
char *
get_application_pid()
{
return get_application_pid_helper(false);
}
* On recent XNU versions, the kernel now prefixes the executable path
* with the string executable_path= so it can be parsed getenv style.
*/
#ifdef MACOS
# define EXECUTABLE_KEY "executable_path="
#endif
static char *
get_application_name_helper(bool ignore_cache, bool full_path)
{
if (!executable_path[0] || ignore_cache) {
#ifdef VMX86_SERVER
if (os_in_vmkernel_userworld()) {
vmk_getnamefrompid(pid, executable_path, sizeof(executable_path));
} else
#endif
if (DYNAMO_OPTION(early_inject)) {
ASSERT(executable_path[0] != '\0' &&
"i#907: Can't read /proc/self/exe for early injection");
} else {
#ifdef LINUX
strncpy(executable_path, read_proc_self_exe(ignore_cache),
BUFFER_SIZE_ELEMENTS(executable_path));
#else
char *c, **env = our_environ;
do {
env++;
} while (*env != NULL);
env++;
c = *env;
if (strncmp(EXECUTABLE_KEY, c, strlen(EXECUTABLE_KEY)) == 0) {
c += strlen(EXECUTABLE_KEY);
}
* this should usually be absolute -- but we go ahead and
* handle relative just in case (and to handle child processes).
* We add the cur dir, but note that the resulting path can
* still contain . or .. so it's not normalized (but it is a
* correct absolute path). Xref i#1402, i#1406, i#1407.
*/
if (*c != '/') {
int len;
if (!os_get_current_dir(executable_path,
BUFFER_SIZE_ELEMENTS(executable_path)))
len = 0;
else
len = strlen(executable_path);
snprintf(executable_path + len,
BUFFER_SIZE_ELEMENTS(executable_path) - len, "%s%s",
len > 0 ? "/" : "", c);
} else
strncpy(executable_path, c, BUFFER_SIZE_ELEMENTS(executable_path));
#endif
NULL_TERMINATE_BUFFER(executable_path);
* _init().
*/
ASSERT(strlen(executable_path) > 0 && "readlink /proc/self/exe failed");
}
}
if (executable_basename == NULL || ignore_cache) {
executable_basename = strrchr(executable_path, '/');
executable_basename =
(executable_basename == NULL ? executable_path : executable_basename + 1);
}
return (full_path ? executable_path : executable_basename);
}
char *
get_application_name(void)
{
return get_application_name_helper(false, true );
}
* issues with /proc/self/exe.
*/
void
set_executable_path(const char *exe_path)
{
strncpy(executable_path, exe_path, BUFFER_SIZE_ELEMENTS(executable_path));
NULL_TERMINATE_BUFFER(executable_path);
get_application_name_helper(true , false );
}
* get process names to do selective process following (PR 212034). The
* alternative is to duplicate or compile in this code into libdrpreload.so,
* which is messy. Besides, libdynamorio.so is already loaded into the process
* and avaiable, so cleaner to just use functions from it.
*/
DYNAMORIO_EXPORT const char *
get_application_short_name(void)
{
return get_application_name_helper(false, false );
}
* by get_app_args().
*/
void
set_app_args(IN int *app_argc_in, IN char **app_argv_in)
{
app_argc = app_argc_in;
app_argv = app_argv_in;
}
int
num_app_args()
{
if (!DYNAMO_OPTION(early_inject)) {
set_client_error_code(NULL, DR_ERROR_NOT_IMPLEMENTED);
return -1;
}
return *app_argc;
}
int
get_app_args(OUT dr_app_arg_t *args_array, int args_count)
{
if (args_array == NULL || args_count < 0) {
set_client_error_code(NULL, DR_ERROR_INVALID_PARAMETER);
return -1;
}
if (!DYNAMO_OPTION(early_inject)) {
set_client_error_code(NULL, DR_ERROR_NOT_IMPLEMENTED);
return -1;
}
int num_args = num_app_args();
int min = (args_count < num_args) ? args_count : num_args;
for (int i = 0; i < min; i++) {
args_array[i].start = (void *)app_argv[i];
args_array[i].size = strlen(app_argv[i]) + 1 ;
args_array[i].encoding = DR_APP_ARG_CSTR_COMPAT;
}
return min;
}
#define PROC_CPUINFO "/proc/cpuinfo"
#define CPUMHZ_LINE_LENGTH 64
#define CPUMHZ_LINE_FORMAT "cpu MHz\t\t: %lu.%03lu\n"
static timestamp_t
get_timer_frequency_cpuinfo(void)
{
file_t cpuinfo;
ssize_t nread;
char *buf;
char *mhz_line;
ulong cpu_mhz = 1000;
ulong cpu_khz = 0;
cpuinfo = os_open(PROC_CPUINFO, OS_OPEN_READ);
if (cpuinfo == INVALID_FILE)
return 1000 * 1000;
* or interrupted read, our timer frequency estimate will be off, but it's
* not the end of the world.
* FIXME: Factor a buffered file reader out of our maps iterator if we want
* to do this the right way.
*/
buf = global_heap_alloc(PAGE_SIZE HEAPACCT(ACCT_OTHER));
nread = os_read(cpuinfo, buf, PAGE_SIZE - 1);
if (nread > 0) {
buf[nread] = '\0';
mhz_line = strstr(buf, "cpu MHz\t\t:");
if (mhz_line != NULL &&
sscanf(mhz_line, CPUMHZ_LINE_FORMAT, &cpu_mhz, &cpu_khz) == 2) {
LOG(GLOBAL, LOG_ALL, 2, "Processor speed exactly %lu.%03luMHz\n", cpu_mhz,
cpu_khz);
}
}
global_heap_free(buf, PAGE_SIZE HEAPACCT(ACCT_OTHER));
os_close(cpuinfo);
return cpu_mhz * 1000 + cpu_khz;
}
timestamp_t
get_timer_frequency()
{
#ifdef VMX86_SERVER
if (os_in_vmkernel_userworld()) {
return vmk_get_timer_frequency();
}
#endif
return get_timer_frequency_cpuinfo();
}
* That's the Windows standard. But Linux uses the Epoch of Jan 1, 1970.
*/
#define UTC_TO_EPOCH_SECONDS 11644473600
uint
query_time_seconds(void)
{
struct timeval current_time;
#if defined(MACOS) && defined(AARCH64)
* is re-entrant, just a simple load from commpage).
*/
# undef gettimeofday
uint64 val = gettimeofday(¤t_time, NULL);
#else
uint64 val = dynamorio_syscall(SYS_gettimeofday, 2, ¤t_time, NULL);
# ifdef MACOS
if (macos_version < MACOS_VERSION_SIERRA) {
if ((int)val < 0)
return 0;
return (uint)val + UTC_TO_EPOCH_SECONDS;
}
# endif
#endif
if ((int)val >= 0) {
return current_time.tv_sec + UTC_TO_EPOCH_SECONDS;
} else {
ASSERT_NOT_REACHED();
return 0;
}
}
#if defined(MACOS) && defined(AARCH64)
# include <sys/time.h>
#endif
uint64
query_time_millis()
{
struct timeval current_time;
#if !(defined(MACOS) && defined(AARCH64))
uint64 val = dynamorio_syscall(SYS_gettimeofday, 2, ¤t_time, NULL);
#else
* is re-entrant, just a simple load from commpage).
*/
# undef gettimeofday
uint64 val = gettimeofday(¤t_time, NULL);
#endif
#ifdef MACOS
if (macos_version < MACOS_VERSION_SIERRA) {
if ((int)val > 0) {
current_time.tv_sec = (uint)val;
current_time.tv_usec = (uint)(val >> 32);
}
}
#endif
if ((int)val >= 0) {
uint64 res =
(((uint64)current_time.tv_sec) * 1000) + (current_time.tv_usec / 1000);
res += UTC_TO_EPOCH_SECONDS * 1000;
return res;
} else {
ASSERT_NOT_REACHED();
return 0;
}
}
uint64
query_time_micros()
{
struct timeval current_time;
uint64 val = dynamorio_syscall(SYS_gettimeofday, 2, ¤t_time, NULL);
#ifdef MACOS
if (macos_version < MACOS_VERSION_SIERRA) {
if ((int)val > 0) {
current_time.tv_sec = (uint)val;
current_time.tv_usec = (uint)(val >> 32);
}
}
#endif
if ((int)val >= 0) {
uint64 res = (((uint64)current_time.tv_sec) * 1000000) + current_time.tv_usec;
res += UTC_TO_EPOCH_SECONDS * 1000000;
return res;
} else {
ASSERT_NOT_REACHED();
return 0;
}
}
#ifdef RETURN_AFTER_CALL
assumptions about program state, i.e. that frame pointers are valid
and should be used only in well known points for release build.
*/
static app_pc
find_stack_bottom()
{
app_pc retaddr = 0;
int depth = 0;
reg_t *fp;
asm("mov %%" ASM_XBP ", %0" : "=m"(fp));
LOG(THREAD_GET, LOG_ALL, 3, "Find stack bottom:\n");
while (fp != NULL && is_readable_without_exception((byte *)fp, sizeof(reg_t) * 2)) {
retaddr = (app_pc) * (fp + 1);
LOG(THREAD_GET, LOG_ALL, 3,
"\tframe ptr " PFX " => parent " PFX ", ret = " PFX "\n", fp, *fp, retaddr);
depth++;
if (fp == (reg_t *)*fp || depth > 100)
break;
fp = (reg_t *)*fp;
}
return retaddr;
}
#endif
void
os_slow_exit(void)
{
#ifdef MACOS64
tls_process_exit();
#endif
#ifdef LINUX
if (!standalone_library)
d_r_rseq_exit();
#endif
d_r_signal_exit();
memquery_exit();
ksynch_exit();
generic_hash_destroy(GLOBAL_DCONTEXT, fd_table);
fd_table = NULL;
if (doing_detach) {
vsyscall_page_start = NULL;
ASSERT(num_fd_add_pre_heap == 0);
}
DELETE_LOCK(set_thread_area_lock);
DELETE_LOCK(client_tls_lock);
IF_NO_MEMQUERY(memcache_exit());
}
*(i#2921).
*/
void
block_cleanup_and_terminate(dcontext_t *dcontext, int sysnum, ptr_uint_t sys_arg1,
ptr_uint_t sys_arg2, bool exitproc,
ptr_uint_t sys_arg3, ptr_uint_t sys_arg4)
{
* signal that reaches us now can be delayed until after the exit is complete.
* We may still receive a suspend signal for synchronization that we may need
* to reply to (i#2921).
*/
if (sysnum == SYS_kill)
block_all_noncrash_signals_except(NULL, 2, dcontext->sys_param0, SUSPEND_SIGNAL);
else
block_all_noncrash_signals_except(NULL, 1, SUSPEND_SIGNAL);
cleanup_and_terminate(dcontext, sysnum, sys_arg1, sys_arg2, exitproc, sys_arg3,
sys_arg4);
}
void
os_fast_exit(void)
{
}
void
os_terminate_with_code(dcontext_t *dcontext, terminate_flags_t flags, int exit_code)
{
bool use_signal = exit_code > 0x00ff;
ASSERT_NOT_IMPLEMENTED(TEST(TERMINATE_PROCESS, flags));
if (use_signal) {
int sig = (exit_code & 0xff00) >> 8;
os_terminate_via_signal(dcontext, flags, sig);
ASSERT_NOT_REACHED();
}
if (TEST(TERMINATE_CLEANUP, flags)) {
KSTOP_REWIND_UNTIL(thread_measured);
block_cleanup_and_terminate(dcontext, SYSNUM_EXIT_PROCESS, exit_code, 0,
true , 0, 0);
} else {
d_r_config_exit();
exit_process_syscall(exit_code);
}
}
void
os_terminate(dcontext_t *dcontext, terminate_flags_t flags)
{
os_terminate_with_code(dcontext, flags, -1);
}
int
os_timeout(int time_in_milliseconds)
{
ASSERT_NOT_IMPLEMENTED(false);
return 0;
}
* SEGMENT STEALING
*
* Not easy to make truly transparent -- but the alternative of dispatch
* by thread id on global memory has performance implications.
* Pull the non-STEAL_SEGMENT code out of the cvs attic for a base if
* transparency becomes more of a problem.
*/
#define TLS_LOCAL_STATE_OFFSET (offsetof(os_local_state_t, state))
#define TLS_OS_LOCAL_STATE 0x00
#define TLS_SELF_OFFSET (TLS_OS_LOCAL_STATE + offsetof(os_local_state_t, self))
#define TLS_THREAD_ID_OFFSET (TLS_OS_LOCAL_STATE + offsetof(os_local_state_t, tid))
#define TLS_DCONTEXT_OFFSET (TLS_OS_LOCAL_STATE + TLS_DCONTEXT_SLOT)
#ifdef X86
# define TLS_MAGIC_OFFSET (TLS_OS_LOCAL_STATE + offsetof(os_local_state_t, magic))
#endif
*/
#define TLS_APP_LIB_TLS_BASE_OFFSET (offsetof(os_local_state_t, app_lib_tls_base))
#define TLS_APP_ALT_TLS_BASE_OFFSET (offsetof(os_local_state_t, app_alt_tls_base))
#define TLS_APP_LIB_TLS_REG_OFFSET (offsetof(os_local_state_t, app_lib_tls_reg))
#define TLS_APP_ALT_TLS_REG_OFFSET (offsetof(os_local_state_t, app_alt_tls_reg))
* We use %c[0-9] to get gcc to emit an integer constant without a leading $ for
* the segment offset. See the documentation here:
* http://gcc.gnu.org/onlinedocs/gccint/Output-Template.html#Output-Template
* Also, var needs to match the pointer size, or else we'll get stack corruption.
* XXX: This is marked volatile prevent gcc from speculating this code before
* checks for is_thread_tls_initialized(), but if we could find a more
* precise constraint, then the compiler would be able to optimize better. See
* glibc comments on THREAD_SELF.
*/
#ifdef DR_HOST_NOT_TARGET
# define WRITE_TLS_SLOT_IMM(imm, var) var = 0, ASSERT_NOT_REACHED()
# define READ_TLS_SLOT_IMM(imm, var) var = 0, ASSERT_NOT_REACHED()
# define WRITE_TLS_INT_SLOT_IMM(imm, var) var = 0, ASSERT_NOT_REACHED()
# define READ_TLS_INT_SLOT_IMM(imm, var) var = 0, ASSERT_NOT_REACHED()
# define WRITE_TLS_SLOT(offs, var) offs = var ? 0 : 1, ASSERT_NOT_REACHED()
# define READ_TLS_SLOT(offs, var) var = (void *)(ptr_uint_t)offs, ASSERT_NOT_REACHED()
#elif defined(MACOS64) && !defined(AARCH64)
* it in slot 6. If we settle on always doing the full os_local_state_t in slots,
* we would probably get rid of the indirection here and directly access slot fields.
*/
# define WRITE_TLS_SLOT_IMM(imm, var) \
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \
ASSERT(sizeof(var) == sizeof(void *)); \
__asm__ __volatile__( \
"mov %%gs:%1, %%" ASM_XAX " \n\t" \
"movq %0, %c2(%%" ASM_XAX ") \n\t" \
: \
: "r"(var), "m"(*(void **)(DR_TLS_BASE_SLOT * sizeof(void *))), "i"(imm) \
: "memory", ASM_XAX);
# define READ_TLS_SLOT_IMM(imm, var) \
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \
ASSERT(sizeof(var) == sizeof(void *)); \
__asm__ __volatile__("mov %%gs:%1, %%" ASM_XAX " \n\t" \
"movq %c2(%%" ASM_XAX "), %0 \n\t" \
: "=r"(var) \
: "m"(*(void **)(DR_TLS_BASE_SLOT * sizeof(void *))), \
"i"(imm) \
: ASM_XAX);
# define WRITE_TLS_SLOT(offs, var) \
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \
__asm__ __volatile__("mov %%gs:%0, %%" ASM_XAX " \n\t" \
"movzwq %1, %%" ASM_XDX " \n\t" \
"movq %2, (%%" ASM_XAX ", %%" ASM_XDX ") \n\t" \
: \
: "m"(*(void **)(DR_TLS_BASE_SLOT * sizeof(void *))), \
"m"(offs), "r"(var) \
: "memory", ASM_XAX, ASM_XDX);
# define READ_TLS_SLOT(offs, var) \
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \
ASSERT(sizeof(var) == sizeof(void *)); \
__asm__ __volatile__("mov %%gs:%1, %%" ASM_XAX " \n\t" \
"movzwq %2, %%" ASM_XDX " \n\t" \
"movq (%%" ASM_XAX ", %%" ASM_XDX "), %0 \n\t" \
: "=r"(var) \
: "m"(*(void **)(DR_TLS_BASE_SLOT * sizeof(void *))), \
"m"(offs) \
: "memory", ASM_XAX, ASM_XDX);
#elif defined(X86)
# define WRITE_TLS_SLOT_IMM(imm, var) \
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \
ASSERT(sizeof(var) == sizeof(void *)); \
asm volatile("mov %0, %" ASM_SEG ":%c1" : : "r"(var), "i"(imm));
# define READ_TLS_SLOT_IMM(imm, var) \
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \
ASSERT(sizeof(var) == sizeof(void *)); \
asm volatile("mov %" ASM_SEG ":%c1, %0" : "=r"(var) : "i"(imm));
# define WRITE_TLS_INT_SLOT_IMM(imm, var) \
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \
ASSERT(sizeof(var) == sizeof(int)); \
asm volatile("movl %0, %" ASM_SEG ":%c1" : : "r"(var), "i"(imm));
# define READ_TLS_INT_SLOT_IMM(imm, var) \
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \
ASSERT(sizeof(var) == sizeof(int)); \
asm volatile("movl %" ASM_SEG ":%c1, %0" : "=r"(var) : "i"(imm));
# define WRITE_TLS_SLOT(offs, var) \
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \
ASSERT(sizeof(var) == sizeof(void *)); \
ASSERT(sizeof(offs) == 2); \
asm("mov %0, %%" ASM_XAX : : "m"((var)) : ASM_XAX); \
asm("movzw" IF_X64_ELSE("q", "l") " %0, %%" ASM_XDX : : "m"((offs)) : ASM_XDX); \
asm("mov %%" ASM_XAX ", %" ASM_SEG ":(%%" ASM_XDX ")" : : : ASM_XAX, ASM_XDX);
# define READ_TLS_SLOT(offs, var) \
ASSERT(sizeof(var) == sizeof(void *)); \
ASSERT(sizeof(offs) == 2); \
asm("movzw" IF_X64_ELSE("q", "l") " %0, %%" ASM_XAX : : "m"((offs)) : ASM_XAX); \
asm("mov %" ASM_SEG ":(%%" ASM_XAX "), %%" ASM_XAX : : : ASM_XAX); \
asm("mov %%" ASM_XAX ", %0" : "=m"((var)) : : ASM_XAX);
#elif defined(AARCHXX) && !defined(MACOS)
* trouble with relocations if we use a global directly in asm, so we convert to
* a local variable in these macros. We pay the cost of the extra instructions
* for Linux ARM to share the code.
*/
# define WRITE_TLS_SLOT_IMM(imm, var) \
do { \
uint _base_offs = DR_TLS_BASE_OFFSET; \
__asm__ __volatile__("mov " ASM_R2 ", %0 \n\t" READ_TP_TO_R3_DISP_IN_R2 \
"str %1, [" ASM_R3 ", %2] \n\t" \
: \
: "r"(_base_offs), "r"(var), "i"(imm) \
: "memory", ASM_R2, ASM_R3); \
} while (0)
# define READ_TLS_SLOT_IMM(imm, var) \
do { \
uint _base_offs = DR_TLS_BASE_OFFSET; \
__asm__ __volatile__("mov " ASM_R2 ", %1 \n\t" READ_TP_TO_R3_DISP_IN_R2 \
"ldr %0, [" ASM_R3 ", %2] \n\t" \
: "=r"(var) \
: "r"(_base_offs), "i"(imm) \
: ASM_R2, ASM_R3); \
} while (0)
# define WRITE_TLS_INT_SLOT_IMM WRITE_TLS_SLOT_IMM
# define READ_TLS_INT_SLOT_IMM READ_TLS_SLOT_IMM
# define WRITE_TLS_SLOT(offs, var) \
do { \
uint _base_offs = DR_TLS_BASE_OFFSET; \
__asm__ __volatile__("mov " ASM_R2 ", %0 \n\t" READ_TP_TO_R3_DISP_IN_R2 \
"add " ASM_R3 ", " ASM_R3 ", %2 \n\t" \
"str %1, [" ASM_R3 "] \n\t" \
: \
: "r"(_base_offs), "r"(var), "r"(offs) \
: "memory", ASM_R2, ASM_R3); \
} while (0)
# define READ_TLS_SLOT(offs, var) \
do { \
uint _base_offs = DR_TLS_BASE_OFFSET; \
__asm__ __volatile__("mov " ASM_R2 ", %1 \n\t" READ_TP_TO_R3_DISP_IN_R2 \
"add " ASM_R3 ", " ASM_R3 ", %2 \n\t" \
"ldr %0, [" ASM_R3 "] \n\t" \
: "=r"(var) \
: "r"(_base_offs), "r"(offs) \
: ASM_R2, ASM_R3); \
} while (0)
#elif defined(AARCH64) && defined(MACOS)
# define WRITE_TLS_SLOT_IMM(imm, var) WRITE_TLS_SLOT(imm, var)
# define READ_TLS_SLOT_IMM(imm, var) READ_TLS_SLOT(imm, var)
# define WRITE_TLS_INT_SLOT_IMM(imm, var) WRITE_TLS_SLOT(imm, var)
# define READ_TLS_INT_SLOT_IMM(imm, var) READ_TLS_SLOT(imm, var)
# define WRITE_TLS_SLOT(offs, var) \
*((__typeof__(var) *)(tls_get_dr_addr() + offs)) = var;
# define READ_TLS_SLOT(offs, var) \
var = *((__typeof__(var) *)(tls_get_dr_addr() + offs));
#elif defined(RISCV64)
# define WRITE_TLS_SLOT_IMM(imm, var) \
do { \
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \
ASSERT(sizeof(var) == sizeof(void *)); \
__asm__ __volatile__("ld t0, %0(tp) \n\t" \
"sd %1, %2(t0) \n\t" \
: \
: "i"(DR_TLS_BASE_OFFSET), "r"(var), "i"(imm) \
: "memory", "t0"); \
} while (0)
# define READ_TLS_SLOT_IMM(imm, var) \
do { \
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \
ASSERT(sizeof(var) == sizeof(void *)); \
__asm__ __volatile__("ld %0, %1(tp) \n\t" \
"ld %0, %2(%0) \n\t" \
: "+r"(var) \
: "i"(DR_TLS_BASE_OFFSET), "i"(imm)); \
} while (0)
# define WRITE_TLS_INT_SLOT_IMM(imm, var) \
do { \
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \
ASSERT(sizeof(var) == sizeof(void *)); \
__asm__ __volatile__("lw t0, %0(tp) \n\t" \
"sw %1, %2(t0) \n\t" \
: \
: "i"(DR_TLS_BASE_OFFSET), "r"(var), "i"(imm) \
: "memory", "t0"); \
} while (0)
# define READ_TLS_INT_SLOT_IMM(imm, var) \
do { \
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \
ASSERT(sizeof(var) == sizeof(void *)); \
__asm__ __volatile__("lw %0, %1(tp) \n\t" \
"lw %0, %2(%0) \n\t" \
: "=r"(var) \
: "i"(DR_TLS_BASE_OFFSET), "i"(imm)); \
} while (0)
# define WRITE_TLS_SLOT(offs, var) \
do { \
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \
ASSERT(sizeof(var) == sizeof(void *)); \
__asm__ __volatile__("ld t0, %0(tp) \n\t" \
"add t0, t0, %2\n\t" \
"sd %1, 0(t0) \n\t" \
: \
: "i"(DR_TLS_BASE_OFFSET), "r"(var), "r"(offs) \
: "memory", "t0"); \
} while (0)
# define READ_TLS_SLOT(offs, var) \
do { \
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \
ASSERT(sizeof(var) == sizeof(void *)); \
__asm__ __volatile__("ld %0, %1(tp) \n\t" \
"add %0, %0, %2\n\t" \
"ld %0, 0(%0) \n\t" \
: "=r"(var) \
: "i"(DR_TLS_BASE_OFFSET), "r"(offs)); \
} while (0)
#endif
#ifdef X86
* whether TLS is initialized (i#2089).
* We assume alignment does not matter.
*/
static os_local_state_t uninit_tls;
#endif
static bool
is_thread_tls_initialized(void)
{
#if defined(MACOS) && defined(AARCH64)
os_local_state_t *v = (void *)tls_get_dr_addr();
return v != NULL && v->tls_type == TLS_TYPE_SLOT;
#elif defined(MACOS64) && !defined(AARCH64)
* it in slot 6. If we settle on always doing the full os_local_state_t in slots,
* we would probably get rid of the indirection here and directly read the magic
* field from its slot.
*/
byte **tls_swap_slot;
tls_swap_slot = (byte **)get_app_tls_swap_slot_addr();
if (tls_swap_slot == NULL || *tls_swap_slot == NULL ||
*tls_swap_slot == TLS_SLOT_VAL_EXITED)
return false;
return true;
#elif defined(X86)
if (INTERNAL_OPTION(safe_read_tls_init)) {
* It's not worth extending the handler as the faults are a perf hit anyway.
* For standalone_library, first_thread_tls_initialized will always be false,
* so we'll return false here and use our check in get_thread_private_dcontext().
*/
if (!first_thread_tls_initialized || last_thread_tls_exited)
return false;
* detached threads being passed native signals. The detaching thread is
* the one doing all the real cleanup, so we simply avoid any safe reads
* or TLS for detaching threads. This var is not cleared until re-init,
* so we have no race with the end of detach.
*/
if (detacher_tid != INVALID_THREAD_ID && detacher_tid != get_sys_thread_id())
return false;
* and when the MSR is used having a zero selector, and other complexities,
* we just do a blind safe read as the simplest solution once we're past
* initial init and have a fault handler.
*
* i#2089: to avoid the perf cost of syscalls to verify the tid, and to
* distinguish a fork child from a separate-group thread, we no longer read
* the tid field and check that the TLS belongs to this particular thread:
* instead we rely on clearing the .magic field for child threads and at
* thread exit (to avoid a fault) and we simply check the field here.
* A native app thread is very unlikely to match this.
*/
return safe_read_tls_magic() == TLS_MAGIC_VALID;
} else {
* we're confident that the safe read code above is safer, more
* performant, and more robust.
*/
os_local_state_t *os_tls = NULL;
ptr_uint_t cur_seg = read_thread_register(SEG_TLS);
if (cur_seg != 0 && cur_seg != read_thread_register(SEG_SS)) {
READ_TLS_SLOT_IMM(TLS_SELF_OFFSET, os_tls);
}
# if defined(X64) && defined(X86)
if (os_tls == NULL && tls_dr_using_msr()) {
* We can't clear the MSR early in a new thread and then look for
* a zero base here b/c if kernel decides to use GDT that zeroing
* will set the selector, unless we want to assume we know when
* the kernel uses the GDT.
* Instead we make a syscall to get the tid. This should be ok
* perf-wise b/c the common case is the non-zero above.
*/
byte *base = tls_get_fs_gs_segment_base(SEG_TLS);
ASSERT(tls_global_type == TLS_TYPE_ARCH_PRCTL);
if (base != (byte *)POINTER_MAX && base != NULL) {
os_tls = (os_local_state_t *)base;
}
}
# endif
if (os_tls != NULL) {
return (os_tls->tid == get_sys_thread_id() ||
os_tls->state.spill_space.dcontext->owning_process ==
get_parent_id());
} else
return false;
}
#elif defined(AARCHXX) || defined(RISCV64)
byte **dr_tls_base_addr;
if (tls_global_type == TLS_TYPE_NONE)
return false;
dr_tls_base_addr = (byte **)get_dr_tls_base_addr();
if (dr_tls_base_addr == NULL || *dr_tls_base_addr == NULL ||
*dr_tls_base_addr == TLS_SLOT_VAL_EXITED)
return false;
* to infinite recursion for an address not in the vm_reserve area, as
* dynamo_vm_areas_start_reading() ending up calling
* deadlock_avoidance_unlock() which calls get_thread_private_dcontext()
* which comes here.
*/
return true;
#endif
return true;
}
bool
is_DR_segment_reader_entry(app_pc pc)
{
* bbs for DR code that reads DR segments when DR is a static library.
* It's a little ugly but it's not clear there's a better solution.
* See the discussion in i#2463 c#2.
*/
#ifdef X86
if (INTERNAL_OPTION(safe_read_tls_init)) {
return pc == (app_pc)safe_read_tls_magic || pc == (app_pc)safe_read_tls_self;
}
#endif
* more complicated as the PC may not be a function entry but the
* start of a bb after a branch in our C code that uses inline asm
* to read the TLS.
*/
return false;
}
#if defined(X86) || defined(DEBUG)
static bool
is_thread_tls_allocated(void)
{
# if defined(X86) && !defined(MACOS64)
if (INTERNAL_OPTION(safe_read_tls_init)) {
* is_thread_tls_initialized() (and thus is_thread_initialized()) will
* return false.
* Caution: this will also return true on a fresh clone child.
*/
uint magic;
if (!first_thread_tls_initialized || last_thread_tls_exited)
return false;
magic = safe_read_tls_magic();
return magic == TLS_MAGIC_VALID || magic == TLS_MAGIC_INVALID;
}
# endif
return is_thread_tls_initialized();
}
#endif
ushort
os_tls_offset(ushort tls_offs)
{
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED());
ASSERT(TLS_LOCAL_STATE_OFFSET == 0);
return (TLS_LOCAL_STATE_OFFSET + tls_offs IF_X86(IF_MACOS64(+tls_get_dr_offs())));
}
ushort
os_local_state_offset(ushort seg_offs)
{
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED());
ASSERT(TLS_LOCAL_STATE_OFFSET == 0);
return (seg_offs - TLS_LOCAL_STATE_OFFSET IF_MACOS64(-tls_get_dr_offs()));
}
* ostd->dr_fs/gs_base.
*/
void *
os_get_priv_tls_base(dcontext_t *dcontext, reg_id_t reg)
{
os_thread_data_t *ostd;
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED());
ASSERT(reg == TLS_REG_ALT || reg == TLS_REG_LIB);
if (dcontext == NULL)
dcontext = get_thread_private_dcontext();
if (dcontext == NULL)
return NULL;
ostd = (os_thread_data_t *)dcontext->os_field;
if (reg == TLS_REG_LIB)
return ostd->priv_lib_tls_base;
else if (reg == TLS_REG_ALT)
return ostd->priv_alt_tls_base;
ASSERT_NOT_REACHED();
return NULL;
}
os_local_state_t *
get_os_tls(void)
{
os_local_state_t *os_tls = NULL;
ASSERT(is_thread_tls_initialized());
READ_TLS_SLOT_IMM(TLS_SELF_OFFSET, os_tls);
return os_tls;
}
* situations where get_os_tls() fails.
*/
static os_local_state_t *
get_os_tls_from_dc(dcontext_t *dcontext)
{
byte *local_state;
ASSERT(dcontext != NULL);
local_state = (byte *)dcontext->local_state;
if (local_state == NULL)
return NULL;
return (os_local_state_t *)(local_state - offsetof(os_local_state_t, state));
}
#ifdef AARCHXX
bool
os_set_app_tls_base(dcontext_t *dcontext, reg_id_t reg, void *base)
{
os_local_state_t *os_tls;
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED());
ASSERT(reg == TLS_REG_LIB || reg == TLS_REG_ALT);
if (dcontext == NULL)
dcontext = get_thread_private_dcontext();
ASSERT(dcontext != NULL);
os_tls = get_os_tls_from_dc(dcontext);
if (reg == TLS_REG_LIB) {
os_tls->app_lib_tls_base = base;
LOG(THREAD, LOG_THREADS, 1, "TLS app lib base =" PFX "\n", base);
return true;
} else if (reg == TLS_REG_ALT) {
os_tls->app_alt_tls_base = base;
LOG(THREAD, LOG_THREADS, 1, "TLS app alt base =" PFX "\n", base);
return true;
}
ASSERT_NOT_REACHED();
return false;
}
#endif
void *
os_get_app_tls_base(dcontext_t *dcontext, reg_id_t reg)
{
os_local_state_t *os_tls;
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED());
ASSERT(reg == TLS_REG_LIB || reg == TLS_REG_ALT);
if (dcontext == NULL)
dcontext = get_thread_private_dcontext();
if (dcontext == NULL) {
* the app's segments. get_segment_base is expensive, but this should
* be rare. Re-examine if it pops up in a profile.
*/
return get_segment_base(reg);
}
os_tls = get_os_tls_from_dc(dcontext);
if (reg == TLS_REG_LIB)
return os_tls->app_lib_tls_base;
else if (reg == TLS_REG_ALT)
return os_tls->app_alt_tls_base;
ASSERT_NOT_REACHED();
return NULL;
}
ushort
os_get_app_tls_base_offset(reg_id_t reg)
{
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED());
ASSERT(TLS_LOCAL_STATE_OFFSET == 0);
if (reg == TLS_REG_LIB)
return TLS_APP_LIB_TLS_BASE_OFFSET;
else if (reg == TLS_REG_ALT)
return TLS_APP_ALT_TLS_BASE_OFFSET;
ASSERT_NOT_REACHED();
return 0;
}
#ifdef X86
ushort
os_get_app_tls_reg_offset(reg_id_t reg)
{
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED());
ASSERT(TLS_LOCAL_STATE_OFFSET == 0);
if (reg == TLS_REG_LIB)
return TLS_APP_LIB_TLS_REG_OFFSET;
else if (reg == TLS_REG_ALT)
return TLS_APP_ALT_TLS_REG_OFFSET;
ASSERT_NOT_REACHED();
return 0;
}
#endif
void *
d_r_get_tls(ushort tls_offs)
{
void *val;
READ_TLS_SLOT(tls_offs, val);
return val;
}
void
d_r_set_tls(ushort tls_offs, void *value)
{
WRITE_TLS_SLOT(tls_offs, value);
}
* Assumes that cs, ss, ds, and es are flat.
* Should we export this to clients? For now they can get
* this information via opnd_compute_address().
*/
byte *
get_segment_base(uint seg)
{
#if defined(MACOS64) && defined(X86)
ptr_uint_t *pthread_self = (ptr_uint_t *)read_thread_register(seg);
return (byte *)&pthread_self[SEG_TLS_BASE_SLOT];
#elif defined(X86)
if (seg == SEG_CS || seg == SEG_SS || seg == SEG_DS || seg == SEG_ES)
return NULL;
# ifdef HAVE_TLS
return tls_get_fs_gs_segment_base(seg);
# else
return (byte *)POINTER_MAX;
# endif
#elif defined(AARCHXX) || defined(RISCV64)
return (byte *)read_thread_register(seg);
#endif
}
* segment base value.
*/
byte *
get_app_segment_base(uint seg)
{
#ifdef X86
if (seg == SEG_CS || seg == SEG_SS || seg == SEG_DS || seg == SEG_ES)
return NULL;
#endif
if (INTERNAL_OPTION(private_loader) && first_thread_tls_initialized &&
!last_thread_tls_exited) {
return d_r_get_tls(os_get_app_tls_base_offset(seg));
}
return get_segment_base(seg);
}
local_state_extended_t *
get_local_state_extended()
{
os_local_state_t *os_tls = NULL;
ASSERT(is_thread_tls_initialized());
READ_TLS_SLOT_IMM(TLS_SELF_OFFSET, os_tls);
return &(os_tls->state);
}
local_state_t *
get_local_state()
{
#ifdef HAVE_TLS
return (local_state_t *)get_local_state_extended();
#else
return NULL;
#endif
}
#ifdef DEBUG
void
os_enter_dynamorio(void)
{
# if (defined(AARCHXX) || defined(RISCV64)) && !defined(MACOS)
ASSERT(*(byte **)get_dr_tls_base_addr() != TLS_SLOT_VAL_EXITED);
# endif
}
#endif
* os_handle_mov_seg updates the app's tls selector maintained by DR.
* It is called before entering code cache in dispatch_enter_fcache.
*/
void
os_handle_mov_seg(dcontext_t *dcontext, byte *pc)
{
#ifdef X86
instr_t instr;
opnd_t opnd;
reg_id_t seg;
ushort sel = 0;
our_modify_ldt_t *desc;
int desc_idx;
os_local_state_t *os_tls;
os_thread_data_t *ostd;
instr_init(dcontext, &instr);
decode_cti(dcontext, pc, &instr);
ASSERT(instr_get_opcode(&instr) == OP_mov_seg);
opnd = instr_get_dst(&instr, 0);
ASSERT(opnd_is_reg(opnd));
seg = opnd_get_reg(opnd);
ASSERT(reg_is_segment(seg));
ostd = (os_thread_data_t *)dcontext->os_field;
desc = (our_modify_ldt_t *)ostd->app_thread_areas;
os_tls = get_os_tls();
opnd = instr_get_src(&instr, 0);
if (opnd_is_reg(opnd)) {
sel = (ushort)reg_get_value_priv(opnd_get_reg(opnd), get_mcontext(dcontext));
} else {
void *ptr;
ptr = (ushort *)opnd_compute_address_priv(opnd, get_mcontext(dcontext));
ASSERT(ptr != NULL);
if (!d_r_safe_read(ptr, sizeof(sel), &sel)) {
ASSERT_NOT_IMPLEMENTED(false);
}
}
desc_idx = SELECTOR_INDEX(sel) - tls_min_index();
if (seg == TLS_REG_LIB) {
os_tls->app_lib_tls_reg = sel;
os_tls->app_lib_tls_base = (void *)(ptr_uint_t)desc[desc_idx].base_addr;
} else {
os_tls->app_alt_tls_reg = sel;
os_tls->app_alt_tls_base = (void *)(ptr_uint_t)desc[desc_idx].base_addr;
}
instr_free(dcontext, &instr);
LOG(THREAD_GET, LOG_THREADS, 2,
"thread " TIDFMT " segment change %s to selector 0x%x => "
"app lib tls base: " PFX ", alt tls base: " PFX "\n",
d_r_get_thread_id(), reg_names[seg], sel, os_tls->app_lib_tls_base,
os_tls->app_alt_tls_base);
#elif defined(ARM)
ASSERT_NOT_REACHED();
#endif
}
* Must be called before DR setup its own segment.
*/
static void
os_tls_app_seg_init(os_local_state_t *os_tls, void *segment)
{
app_pc app_lib_tls_base, app_alt_tls_base;
#if defined(X86) && !defined(MACOS64)
int i, index;
our_modify_ldt_t *desc;
os_tls->app_lib_tls_reg = read_thread_register(TLS_REG_LIB);
os_tls->app_alt_tls_reg = read_thread_register(TLS_REG_ALT);
#endif
app_lib_tls_base = get_segment_base(TLS_REG_LIB);
app_alt_tls_base = get_segment_base(TLS_REG_ALT);
* or to &uninit_tls (i#2089), both of which will be is_dynamo_address().
*/
os_tls->app_lib_tls_base =
is_dynamo_address(app_lib_tls_base) ? NULL : app_lib_tls_base;
os_tls->app_alt_tls_base =
is_dynamo_address(app_alt_tls_base) ? NULL : app_alt_tls_base;
#if defined(X86) && !defined(MACOS64)
* It has syscall number 211.
* It works for a 32-bit application running in a 64-bit kernel.
* It returns error value -38 for a 64-bit app in a 64-bit kernel.
*/
desc = &os_tls->os_seg_info.app_thread_areas[0];
tls_initialize_indices(os_tls);
index = tls_min_index();
for (i = 0; i < GDT_NUM_TLS_SLOTS; i++) {
tls_get_descriptor(i + index, &desc[i]);
}
#endif
os_tls->os_seg_info.dr_tls_base = segment;
os_tls->os_seg_info.priv_alt_tls_base = IF_X86_ELSE(segment, NULL);
if (INTERNAL_OPTION(private_loader)) {
os_tls->os_seg_info.priv_lib_tls_base = IF_UNIT_TEST_ELSE(
os_tls->app_lib_tls_base, privload_tls_init(os_tls->app_lib_tls_base));
}
#if defined(X86) && !defined(MACOSX64)
LOG(THREAD_GET, LOG_THREADS, 1,
"thread " TIDFMT " app lib tls reg: 0x%x, alt tls reg: 0x%x\n",
d_r_get_thread_id(), os_tls->app_lib_tls_reg, os_tls->app_alt_tls_reg);
#endif
LOG(THREAD_GET, LOG_THREADS, 1,
"thread " TIDFMT " app lib tls base: " PFX ", alt tls base: " PFX "\n",
d_r_get_thread_id(), os_tls->app_lib_tls_base, os_tls->app_alt_tls_base);
LOG(THREAD_GET, LOG_THREADS, 1,
"thread " TIDFMT " priv lib tls base: " PFX ", alt tls base: " PFX ", "
"DR's tls base: " PFX "\n",
d_r_get_thread_id(), os_tls->os_seg_info.priv_lib_tls_base,
os_tls->os_seg_info.priv_alt_tls_base, os_tls->os_seg_info.dr_tls_base);
}
void
os_tls_init(void)
{
#ifdef X86
ASSERT(TLS_MAGIC_OFFSET_ASM == TLS_MAGIC_OFFSET);
ASSERT(TLS_SELF_OFFSET_ASM == TLS_SELF_OFFSET);
#endif
#ifdef HAVE_TLS
* selector into fs/gs.
* FIXME PR 205276: this whole scheme currently does not check if app is using
* segments need to watch modify_ldt syscall
*/
# ifdef MACOS64
* We also store a pointer to it in TLS slot 6.
*/
byte *segment = tls_get_dr_addr();
# else
byte *segment = heap_mmap(PAGE_SIZE, MEMPROT_READ | MEMPROT_WRITE,
VMM_SPECIAL_MMAP | VMM_PER_THREAD);
# endif
os_local_state_t *os_tls = (os_local_state_t *)segment;
LOG(GLOBAL, LOG_THREADS, 1, "os_tls_init for thread " TIDFMT "\n",
d_r_get_thread_id());
ASSERT(!is_thread_tls_initialized());
memset(segment, 0, IF_MACOSA64_ELSE(sizeof(os_local_state_t), PAGE_SIZE));
os_tls->self = os_tls;
os_tls->tid = get_sys_thread_id();
os_tls->tls_type = TLS_TYPE_NONE;
# ifdef X86
os_tls->magic = TLS_MAGIC_VALID;
# endif
* even when -no_mangle_app_seg is set. If -mangle_app_seg is set, this
* will be overwritten in os_tls_app_seg_init().
*/
os_tls->os_seg_info.dr_tls_base = segment;
ASSERT(proc_is_cache_aligned(os_tls->self + TLS_LOCAL_STATE_OFFSET));
ASSERT(DYNAMO_OPTION(ibl_table_in_tls));
if (MACHINE_TLS_IS_DR_TLS)
os_tls_app_seg_init(os_tls, segment);
tls_thread_init(os_tls, segment);
ASSERT(os_tls->tls_type != TLS_TYPE_NONE);
tls_global_type = os_tls->tls_type;
* Should have create_ldt_entry() return failure instead of asserting, then.
*/
#else
tls_table = (tls_slot_t *)global_heap_alloc(MAX_THREADS *
sizeof(tls_slot_t) HEAPACCT(ACCT_OTHER));
memset(tls_table, 0, MAX_THREADS * sizeof(tls_slot_t));
#endif
if (!first_thread_tls_initialized) {
first_thread_tls_initialized = true;
if (last_thread_tls_exited)
last_thread_tls_exited = false;
}
ASSERT(is_thread_tls_initialized());
}
static bool
should_zero_tls_at_thread_exit()
{
#ifdef X86
* leave &uninit_tls (which was put in place in os_thread_exit()) as long as
* possible. For non-detach, that means until the exit.
*/
return !INTERNAL_OPTION(safe_read_tls_init) || doing_detach;
#else
return true;
#endif
}
void
os_tls_thread_exit(local_state_t *local_state)
{
#ifdef HAVE_TLS
os_local_state_t *os_tls =
(os_local_state_t *)(((byte *)local_state) - offsetof(os_local_state_t, state));
tls_type_t tls_type = os_tls->tls_type;
int index = os_tls->ldt_index;
ASSERT(offsetof(local_state_t, spill_space) ==
offsetof(local_state_extended_t, spill_space));
if (should_zero_tls_at_thread_exit()) {
tls_thread_free(tls_type, index);
# if defined(X86) && defined(X64) && !defined(MACOS)
if (tls_type == TLS_TYPE_ARCH_PRCTL) {
if (read_thread_register(SEG_TLS) != 0) {
static const ptr_uint_t zero = 0;
WRITE_DR_SEG(zero);
}
}
# endif
}
* If set, get_thread_private_dcontext() returns NULL, which may cause
* other thread fault on using dcontext.
*/
if (dynamo_exited_all_other_threads && !last_thread_tls_exited) {
last_thread_tls_exited = true;
first_thread_tls_initialized = false;
}
#endif
}
* !other_thread) then also frees kernel resources for the calling
* thread; if other_thread then that may not be possible.
*/
void
os_tls_exit(local_state_t *local_state, bool other_thread)
{
#ifdef HAVE_TLS
# if defined(X86) && !defined(MACOS64)
static const ptr_uint_t zero = 0;
# endif
# if defined(X86) && !defined(MACOS64)
* to indicate that.
*/
if (!other_thread && read_thread_register(SEG_TLS) != 0 &&
should_zero_tls_at_thread_exit()) {
WRITE_DR_SEG(zero);
}
# endif
* leave it un-cleaned-up. That's fine if the other thread is exiting:
* but for detach (i#95) we get the other thread to run this code.
*/
if (!other_thread)
os_tls_thread_exit(local_state);
# ifndef MACOS64
os_local_state_t *os_tls =
(os_local_state_t *)(((byte *)local_state) - offsetof(os_local_state_t, state));
heap_munmap(os_tls->self, PAGE_SIZE, VMM_SPECIAL_MMAP | VMM_PER_THREAD);
# endif
#else
global_heap_free(tls_table, MAX_THREADS * sizeof(tls_slot_t) HEAPACCT(ACCT_OTHER));
DELETE_LOCK(tls_lock);
#endif
}
static int
os_tls_get_gdt_index(dcontext_t *dcontext)
{
os_local_state_t *os_tls = (os_local_state_t *)(((byte *)dcontext->local_state) -
offsetof(os_local_state_t, state));
if (os_tls->tls_type == TLS_TYPE_GDT)
return os_tls->ldt_index;
else
return -1;
}
void
os_tls_pre_init(int gdt_index)
{
#if defined(X86) && !defined(MACOS64)
if (gdt_index > 0) {
DEBUG_DECLARE(bool ok;)
static const ptr_uint_t zero = 0;
* call get_thread_private_dcontext()
*/
WRITE_DR_SEG(zero);
DEBUG_DECLARE(ok =)
tls_clear_descriptor(gdt_index);
ASSERT(ok);
}
#elif defined(ARM)
ASSERT_NOT_IMPLEMENTED(false);
#endif
}
bool
os_tls_calloc(OUT uint *offset, uint num_slots, uint alignment)
{
bool res = false;
uint i, count = 0;
int start = -1;
uint offs = offsetof(os_local_state_t, client_tls);
if (num_slots == 0 || num_slots > MAX_NUM_CLIENT_TLS)
return false;
d_r_mutex_lock(&client_tls_lock);
for (i = 0; i < MAX_NUM_CLIENT_TLS; i++) {
if (!client_tls_allocated[i] &&
(alignment == 0 || ALIGNED(offs + i * sizeof(void *), alignment))) {
if (start == -1)
start = i;
count++;
if (count >= num_slots)
break;
} else {
start = -1;
count = 0;
}
}
if (count >= num_slots) {
for (i = 0; i < num_slots; i++)
client_tls_allocated[i + start] = true;
*offset = offs + start * sizeof(void *);
res = true;
}
d_r_mutex_unlock(&client_tls_lock);
return res;
}
bool
os_tls_cfree(uint offset, uint num_slots)
{
uint i;
uint offs = (offset - offsetof(os_local_state_t, client_tls)) / sizeof(void *);
bool ok = true;
d_r_mutex_lock(&client_tls_lock);
for (i = 0; i < num_slots; i++) {
if (!client_tls_allocated[i + offs])
ok = false;
client_tls_allocated[i + offs] = false;
}
d_r_mutex_unlock(&client_tls_lock);
return ok;
}
void
os_thread_init(dcontext_t *dcontext, void *os_data)
{
os_local_state_t *os_tls = get_os_tls();
os_thread_data_t *ostd = (os_thread_data_t *)heap_alloc(
dcontext, sizeof(os_thread_data_t) HEAPACCT(ACCT_OTHER));
dcontext->os_field = (void *)ostd;
* (don't have app esp register handy here to init now)
*/
memset(ostd, 0, sizeof(*ostd));
ksynch_init_var(&ostd->suspended);
ksynch_init_var(&ostd->wakeup);
ksynch_init_var(&ostd->resumed);
ksynch_init_var(&ostd->terminated);
ksynch_init_var(&ostd->detached);
#ifdef RETURN_AFTER_CALL
* init now preceding vm_areas_init(), we initialize in find_executable_vm_areas()
*/
ostd->stack_bottom_pc = NULL;
#endif
ASSIGN_INIT_LOCK_FREE(ostd->suspend_lock, suspend_lock);
signal_thread_init(dcontext, os_data);
* the value was first get in os_tls_init and stored in os_tls
*/
ostd->priv_lib_tls_base = os_tls->os_seg_info.priv_lib_tls_base;
ostd->priv_alt_tls_base = os_tls->os_seg_info.priv_alt_tls_base;
ostd->dr_tls_base = os_tls->os_seg_info.dr_tls_base;
LOG(THREAD, LOG_THREADS, 1, "TLS app lib base =" PFX "\n", os_tls->app_lib_tls_base);
LOG(THREAD, LOG_THREADS, 1, "TLS app alt base =" PFX "\n", os_tls->app_alt_tls_base);
LOG(THREAD, LOG_THREADS, 1, "TLS priv lib base =" PFX "\n", ostd->priv_lib_tls_base);
LOG(THREAD, LOG_THREADS, 1, "TLS priv alt base =" PFX "\n", ostd->priv_alt_tls_base);
LOG(THREAD, LOG_THREADS, 1, "TLS DynamoRIO base=" PFX "\n", ostd->dr_tls_base);
#ifdef X86
if (INTERNAL_OPTION(mangle_app_seg)) {
ostd->app_thread_areas = heap_alloc(
dcontext, sizeof(our_modify_ldt_t) * GDT_NUM_TLS_SLOTS HEAPACCT(ACCT_OTHER));
memcpy(ostd->app_thread_areas, os_tls->os_seg_info.app_thread_areas,
sizeof(our_modify_ldt_t) * GDT_NUM_TLS_SLOTS);
}
#endif
LOG(THREAD, LOG_THREADS, 1, "post-TLS-setup, cur %s base is " PFX "\n", STR_SEG,
get_segment_base(SEG_TLS));
LOG(THREAD, LOG_THREADS, 1, "post-TLS-setup, cur %s base is " PFX "\n", STR_LIB_SEG,
get_segment_base(LIB_SEG_TLS));
#ifdef MACOS
dcontext->thread_port = dynamorio_mach_syscall(MACH_thread_self_trap, 0);
LOG(THREAD, LOG_ALL, 1, "Mach thread port: %d\n", dcontext->thread_port);
#endif
}
void
os_thread_init_finalize(dcontext_t *dcontext, void *os_data)
{
* is finished so we delay until here: but we need this inside the
* thread_initexit_lock (i#2779).
*/
signal_thread_inherit(dcontext, os_data);
}
void
os_thread_exit(dcontext_t *dcontext, bool other_thread)
{
os_thread_data_t *ostd = (os_thread_data_t *)dcontext->os_field;
* the env vars.
*/
if (dcontext->thread_record->execve)
handle_execve_post(dcontext);
DELETE_LOCK(ostd->suspend_lock);
signal_thread_exit(dcontext, other_thread);
ksynch_free_var(&ostd->suspended);
ksynch_free_var(&ostd->wakeup);
ksynch_free_var(&ostd->resumed);
ksynch_free_var(&ostd->terminated);
ksynch_free_var(&ostd->detached);
#ifdef X86
if (ostd->clone_tls != NULL) {
if (!other_thread) {
os_set_dr_tls_base(dcontext, NULL, (byte *)&uninit_tls);
}
HEAP_TYPE_FREE(dcontext, ostd->clone_tls, os_local_state_t, ACCT_THREAD_MGT,
UNPROTECTED);
}
#endif
if (INTERNAL_OPTION(private_loader))
privload_tls_exit(IF_UNIT_TEST_ELSE(NULL, ostd->priv_lib_tls_base));
DODEBUG({
if (MACHINE_TLS_IS_DR_TLS) {
#ifdef X86
heap_free(dcontext, ostd->app_thread_areas,
sizeof(our_modify_ldt_t) * GDT_NUM_TLS_SLOTS HEAPACCT(ACCT_OTHER));
#endif
}
heap_free(dcontext, ostd, sizeof(os_thread_data_t) HEAPACCT(ACCT_OTHER));
});
}
static void
os_fork_pre(dcontext_t *dcontext)
{
os_thread_data_t *ostd = (os_thread_data_t *)dcontext->os_field;
ASSERT_OWN_NO_LOCKS();
ASSERT(ostd->fork_threads == NULL && ostd->fork_num_threads == 0);
* locks across the fork.
* FIXME i#26: Suspend signals received before initializing siginfo are
* squelched, so we won't be able to suspend threads that are initializing.
*/
LOG(GLOBAL, 2, LOG_SYSCALLS | LOG_THREADS,
"fork: synching with other threads to prevent deadlock in child\n");
if (!synch_with_all_threads(THREAD_SYNCH_SUSPENDED_VALID_MCONTEXT_OR_NO_XFER,
&ostd->fork_threads, &ostd->fork_num_threads,
THREAD_SYNCH_VALID_MCONTEXT,
* risk of deadlock in the child, so it's worth
* retrying on failure.
*/
THREAD_SYNCH_SUSPEND_FAILURE_RETRY)) {
* of deadlock and continue as normal.
*/
LOG(GLOBAL, 1, LOG_SYSCALLS | LOG_THREADS,
"fork: synch failed, possible deadlock in child\n");
ASSERT_CURIOSITY(false);
}
vmm_heap_fork_pre(dcontext);
* locks. If the synch succeeded, no one else is running, so it should be
* safe to release these locks. However, if there are any rogue threads,
* then releasing these locks will allow them to synch and create threads.
* Such threads could be running due to synch failure or presence of
* non-suspendable client threads. We keep our data in ostd to prevent some
* conflicts, but there are some unhandled corner cases.
*/
d_r_mutex_unlock(&thread_initexit_lock);
d_r_mutex_unlock(&all_threads_synch_lock);
}
static void
os_fork_post(dcontext_t *dcontext, bool parent)
{
os_thread_data_t *ostd = (os_thread_data_t *)dcontext->os_field;
d_r_mutex_lock(&all_threads_synch_lock);
d_r_mutex_lock(&thread_initexit_lock);
if (parent) {
LOG(GLOBAL, 2, LOG_SYSCALLS | LOG_THREADS,
"fork: resuming other threads after fork\n");
}
end_synch_with_all_threads(ostd->fork_threads, ostd->fork_num_threads,
parent );
ostd->fork_threads = NULL;
ostd->fork_num_threads = 0;
vmm_heap_fork_post(dcontext, parent);
}
void
os_fork_init(dcontext_t *dcontext)
{
int iter;
ptr_uint_t fd;
ptr_uint_t flags;
ASSERT(sizeof(file_t) <= sizeof(ptr_uint_t));
* forked while another thread held locks. We reset the locks and try to
* cope with any intermediate state left behind from the parent. If we
* encounter more deadlocks after fork, we can add more lock and data resets
* on a case by case basis.
*/
d_r_mutex_fork_reset(&all_threads_synch_lock);
d_r_mutex_fork_reset(&thread_initexit_lock);
os_fork_post(dcontext, false );
pid_cached = get_process_id();
get_application_pid_helper(true);
get_application_name_helper(true, true );
TABLE_RWLOCK(fd_table, write, lock);
iter = 0;
do {
iter = generic_hash_iterate_next(GLOBAL_DCONTEXT, fd_table, iter, &fd,
(void **)&flags);
if (iter < 0)
break;
if (TEST(OS_OPEN_CLOSE_ON_FORK, flags)) {
close_syscall((file_t)fd);
iter = generic_hash_iterate_remove(GLOBAL_DCONTEXT, fd_table, iter, fd);
}
} while (true);
TABLE_RWLOCK(fd_table, write, unlock);
}
static void
os_swap_dr_tls(dcontext_t *dcontext, bool to_app)
{
#ifdef X86
* in os_swap_context()) but there are few consequences of not doing it, and we
* have no code set up separate from the i#2089 scheme here.
*/
if (!INTERNAL_OPTION(safe_read_tls_init))
return;
if (to_app) {
* need our own syscall execution and post-syscall code to have valid scratch
* and dcontext values. We can't clear our own magic b/c we don't know when
* the child will be scheduled, so we use a copy of our TLS. We carefully
* never have a valid magic there in case a prior child is still unscheduled.
*
* We assume the child will not modify this TLS copy in any way.
* CLONE_SETTLS touc * hes the other segment (we'll have to watch for
* addition of CLONE_SETTLS_AUX). The parent will use the scratch space
* returning from the syscall to d_r_dispatch, but we restore via os_clone_post()
* immediately before anybody calls get_thread_private_dcontext() or
* anything.
*/
* and the seg reg value via the clone record (like we do for ARM today).
*/
os_thread_data_t *ostd = (os_thread_data_t *)dcontext->os_field;
os_local_state_t *cur_tls = get_os_tls_from_dc(dcontext);
if (ostd->clone_tls == NULL) {
ostd->clone_tls = (os_local_state_t *)HEAP_TYPE_ALLOC(
dcontext, os_local_state_t, ACCT_THREAD_MGT, UNPROTECTED);
LOG(THREAD, LOG_THREADS, 2, "TLS copy is " PFX "\n", ostd->clone_tls);
}
* invalidating prior to copying.
*/
cur_tls->magic = TLS_MAGIC_INVALID;
memcpy(ostd->clone_tls, cur_tls, sizeof(*ostd->clone_tls));
cur_tls->magic = TLS_MAGIC_VALID;
ostd->clone_tls->self = ostd->clone_tls;
os_set_dr_tls_base(dcontext, NULL, (byte *)ostd->clone_tls);
} else {
os_local_state_t *real_tls = get_os_tls_from_dc(dcontext);
if (get_segment_base(SEG_TLS) != (byte *)real_tls) {
DEBUG_DECLARE(os_thread_data_t *ostd =
(os_thread_data_t *)dcontext->os_field);
ASSERT(get_segment_base(SEG_TLS) == (byte *)ostd->clone_tls);
os_set_dr_tls_base(dcontext, real_tls, (byte *)real_tls);
}
}
#elif defined(AARCHXX)
* always leave the DR pointer in the slot inside the app's or privlib's TLS.
* That means we have nothing to do here.
* For SYS_clone, we are ok with the parent's TLS being inherited until
* new_thread_setup() calls set_thread_register_from_clone_record().
*/
#endif
}
static void
os_new_thread_pre(void)
{
* attaching to an app that is continually making threads.
* XXX i#1305: if we fully suspend all threads during attach we can
* get rid of this barrier.
*/
wait_for_event(dr_attach_finished, 0);
ATOMIC_INC(int, uninit_thread_count);
}
* dr_create_client_thread. Hence os_clone_pre is used for app threads as well
* as client threads. Do not add anything that we do not want to happen while
* in DR mode.
*/
static void
os_clone_pre(dcontext_t *dcontext)
{
* Please refer to comment on os_switch_lib_tls.
*/
if (INTERNAL_OPTION(private_loader)) {
os_switch_lib_tls(dcontext, true );
}
os_swap_dr_tls(dcontext, true );
}
* cloning a client thread in dr_create_client_thread. Hence os_clone_post is
* used for app threads as well as client threads. Do not add anything that
* we do not want to happen while in DR mode.
*/
void
os_clone_post(dcontext_t *dcontext)
{
os_swap_dr_tls(dcontext, false );
}
byte *
os_get_dr_tls_base(dcontext_t *dcontext)
{
os_thread_data_t *ostd = (os_thread_data_t *)dcontext->os_field;
return ostd->dr_tls_base;
}
* loader.
*/
bool
os_should_swap_state(void)
{
#ifdef X86
return (INTERNAL_OPTION(mangle_app_seg) && INTERNAL_OPTION(private_loader));
#elif defined(AARCHXX) || defined(RISCV64)
return INTERNAL_OPTION(private_loader);
#endif
}
bool
os_using_app_state(dcontext_t *dcontext)
{
#ifdef X86
* the dcontext.
*/
if (INTERNAL_OPTION(mangle_app_seg)) {
return (get_segment_base(TLS_REG_LIB) ==
os_get_app_tls_base(dcontext, TLS_REG_LIB));
}
#endif
return true;
}
* private lib segment base and the app's segment base.
* i#107/i#2088: If the app wants to use SEG_TLS, we should also switch that back at
* this boundary, but there are many places where we simply assume it is always
* installed.
*/
void
os_swap_context(dcontext_t *dcontext, bool to_app, dr_state_flags_t flags)
{
if (os_should_swap_state())
os_switch_seg_to_context(dcontext, LIB_SEG_TLS, to_app);
if (TEST(DR_STATE_DR_TLS, flags))
os_swap_dr_tls(dcontext, to_app);
}
void
os_thread_under_dynamo(dcontext_t *dcontext)
{
os_swap_context(dcontext, false , DR_STATE_GO_NATIVE);
signal_swap_mask(dcontext, false );
start_itimer(dcontext);
}
void
os_thread_not_under_dynamo(dcontext_t *dcontext)
{
stop_itimer(dcontext);
signal_swap_mask(dcontext, true );
os_swap_context(dcontext, true , DR_STATE_GO_NATIVE);
}
void
os_process_under_dynamorio_initiate(dcontext_t *dcontext)
{
LOG(GLOBAL, LOG_THREADS, 1, "process now under DR\n");
signal_reinstate_handlers(dcontext, true );
* because dr_app_stop() has no barrier and a thread sent native might
* resume from vsyscall after we remove the hook. However, if we leave the
* hook, then the next takeover signal might hit a native thread that's
* inside DR just to go back native after having hit the hook. For now we
* remove the hook and rely on translate_from_synchall_to_dispatch() moving
* threads from vsyscall to our gencode and not relying on the hook being
* present to finish up their go-native code.
*/
hook_vsyscall(dcontext, false);
}
void
os_process_under_dynamorio_complete(dcontext_t *dcontext)
{
signal_reinstate_alarm_handlers(dcontext);
IF_NO_MEMQUERY({
* threads, if there may have been a gap between setup and start.
*/
if (dr_api_entry)
memcache_update_all_from_os();
});
}
void
os_process_not_under_dynamorio(dcontext_t *dcontext)
{
signal_remove_handlers(dcontext);
unhook_vsyscall();
LOG(GLOBAL, LOG_THREADS, 1, "process no longer under DR\n");
}
bool
detach_do_not_translate(thread_record_t *tr)
{
return false;
}
void
detach_finalize_translation(thread_record_t *tr, priv_mcontext_t *mc)
{
}
void
detach_finalize_cleanup(void)
{
}
static pid_t
get_process_group_id()
{
return dynamorio_syscall(SYS_getpgid, 0);
}
process_id_t
get_parent_id(void)
{
return dynamorio_syscall(SYS_getppid, 0);
}
thread_id_t
get_sys_thread_id(void)
{
#ifdef MACOS
if (kernel_thread_groups)
return dynamorio_syscall(SYS_thread_selfid, 0);
#else
if (kernel_thread_groups)
return dynamorio_syscall(SYS_gettid, 0);
#endif
return dynamorio_syscall(SYS_getpid, 0);
}
thread_id_t
d_r_get_thread_id(void)
{
* this routine in read and recursive locks so use the TLS value instead
*/
thread_id_t id = get_tls_thread_id();
if (id != INVALID_THREAD_ID)
return id;
else
return get_sys_thread_id();
}
thread_id_t
get_tls_thread_id(void)
{
ptr_int_t tid = 0;
if (!is_thread_tls_initialized())
return INVALID_THREAD_ID;
READ_TLS_SLOT_IMM(TLS_THREAD_ID_OFFSET, tid);
* 0x000000007127357b <get_tls_thread_id+37>: mov %gs:(%rax),%rax
* 0x000000007127357f <get_tls_thread_id+41>: mov %rax,-0x8(%rbp)
* so we remove the TRUNCATE check and trucate it on return.
*/
return (thread_id_t)tid;
}
dcontext_t *
get_thread_private_dcontext(void)
{
#ifdef HAVE_TLS
dcontext_t *dcontext = NULL;
* to os_tls_init, as well as after os_tls_exit, and early in a new
* thread's initialization (see comments below on that).
*/
if (!is_thread_tls_initialized())
return standalone_library ? GLOBAL_DCONTEXT : NULL;
* that was affecting performance (xref PR 207366: but I'm leaving the assert in
* for now so debug build will still incur it). So we fixed the cases that
* needed that:
*
* - dynamo_thread_init() calling is_thread_initialized() for a new thread
* created via clone or the start/stop interface: so we have
* is_thread_initialized() pay the d_r_get_thread_id() cost.
* - new_thread_setup()'s ENTER_DR_HOOK kstats, or a crash and the signal
* handler asking about dcontext: we have new_thread_dynamo_start()
* clear the segment register for us early on.
* - child of fork (ASSERT_OWN_NO_LOCKS, etc. on re-entering DR):
* here we just suppress the assert: we'll use this same dcontext.
* xref PR 209518 where w/o this fix we used to need an extra KSTOP.
*
* An alternative would be to have the parent thread clear the segment
* register, or even set up the child's TLS ahead of time ourselves
* (and special-case so that we know if at clone syscall the app state is not
* quite correct: but we're already stealing a register there: PR 286194).
* We could also have the kernel set up TLS for us (PR 285898).
*
* For hotp_only or non-full-control (native_exec, e.g.) (PR 212012), this
* routine is not the only issue: we have to catch all new threads since
* hotp_only gateways assume tls is set up.
* Xref PR 192231.
*/
DOCHECK(CHKLVL_DEFAULT + 1, {
ASSERT(get_tls_thread_id() == get_sys_thread_id() ||
pid_cached != get_process_id());
});
READ_TLS_SLOT_IMM(TLS_DCONTEXT_OFFSET, dcontext);
return dcontext;
#else
* reading and writing same tid! Since both get and set are only for
* the current thread, they cannot both execute simultaneously for the
* same tid, right?
*/
thread_id_t tid = d_r_get_thread_id();
int i;
if (tls_table != NULL) {
for (i = 0; i < MAX_THREADS; i++) {
if (tls_table[i].tid == tid) {
return tls_table[i].dcontext;
}
}
}
return NULL;
#endif
}
void
set_thread_private_dcontext(dcontext_t *dcontext)
{
#ifdef HAVE_TLS
ASSERT(is_thread_tls_allocated());
WRITE_TLS_SLOT_IMM(TLS_DCONTEXT_OFFSET, dcontext);
#else
thread_id_t tid = d_r_get_thread_id();
int i;
bool found = false;
ASSERT(tls_table != NULL);
d_r_mutex_lock(&tls_lock);
for (i = 0; i < MAX_THREADS; i++) {
if (tls_table[i].tid == tid) {
if (dcontext == NULL) {
tls_table[i].tid = 0;
}
tls_table[i].dcontext = dcontext;
found = true;
break;
}
}
if (!found) {
if (dcontext == NULL) {
} else {
for (i = 0; i < MAX_THREADS; i++) {
if (tls_table[i].tid == 0) {
tls_table[i].tid = tid;
tls_table[i].dcontext = dcontext;
found = true;
break;
}
}
}
}
d_r_mutex_unlock(&tls_lock);
ASSERT(found);
#endif
}
* use for forking: child should replace parent's id with its own
*/
static void
replace_thread_id(thread_id_t old, thread_id_t new)
{
#ifdef HAVE_TLS
thread_id_t new_tid = new;
ASSERT(is_thread_tls_initialized());
DOCHECK(1, {
thread_id_t old_tid = 0;
IF_LINUX_ELSE(READ_TLS_INT_SLOT_IMM(TLS_THREAD_ID_OFFSET, old_tid),
READ_TLS_SLOT_IMM(TLS_THREAD_ID_OFFSET, old_tid));
ASSERT(old_tid == old);
});
IF_LINUX_ELSE(WRITE_TLS_INT_SLOT_IMM(TLS_THREAD_ID_OFFSET, new_tid),
WRITE_TLS_SLOT_IMM(TLS_THREAD_ID_OFFSET, new_tid));
#else
int i;
d_r_mutex_lock(&tls_lock);
for (i = 0; i < MAX_THREADS; i++) {
if (tls_table[i].tid == old) {
tls_table[i].tid = new;
break;
}
}
d_r_mutex_unlock(&tls_lock);
#endif
}
static inline uint
osprot_to_memprot(uint prot)
{
uint mem_prot = 0;
if (TEST(PROT_EXEC, prot))
mem_prot |= MEMPROT_EXEC;
if (TEST(PROT_READ, prot))
mem_prot |= MEMPROT_READ;
if (TEST(PROT_WRITE, prot))
mem_prot |= MEMPROT_WRITE;
return mem_prot;
}
* for RWX, which are replaced according to memprot */
uint
osprot_replace_memprot(uint old_osprot, uint memprot)
{
* and they are separate from any other MAP_ flags passed to mmap()
*/
uint new_osprot = memprot_to_osprot(memprot);
return new_osprot;
}
static inline long
mprotect_syscall(byte *p, size_t size, uint prot)
{
return dynamorio_syscall(SYS_mprotect, 3, p, size, prot);
}
bool
os_raw_mem_free(void *p, size_t size, uint flags, heap_error_code_t *error_code)
{
long rc;
ASSERT(error_code != NULL);
ASSERT(size > 0 && ALIGNED(size, PAGE_SIZE));
rc = munmap_syscall(p, size);
if (rc != 0) {
*error_code = -rc;
} else {
*error_code = HEAP_ERROR_SUCCESS;
}
return (rc == 0);
}
* caller is required to handle thread synchronization and to update
*/
void *
os_raw_mem_alloc(void *preferred, size_t size, uint prot, uint flags,
heap_error_code_t *error_code)
{
byte *p;
uint os_prot = memprot_to_osprot(prot);
uint os_flags =
MAP_PRIVATE | MAP_ANONYMOUS | (TEST(RAW_ALLOC_32BIT, flags) ? MAP_32BIT : 0);
#if defined(MACOS) && defined(AARCH64)
if (TEST(MEMPROT_EXEC, prot))
os_flags |= MAP_JIT;
#endif
ASSERT(error_code != NULL);
ASSERT(size > 0 && ALIGNED(size, PAGE_SIZE));
p = mmap_syscall(preferred, size, os_prot, os_flags, -1, 0);
if (!mmap_syscall_succeeded(p)) {
*error_code = -(heap_error_code_t)(ptr_int_t)p;
LOG(GLOBAL, LOG_HEAP, 3, "os_raw_mem_alloc %d bytes failed" PFX "\n", size, p);
return NULL;
}
if (preferred != NULL && p != preferred) {
*error_code = HEAP_ERROR_NOT_AT_PREFERRED;
os_raw_mem_free(p, size, flags, error_code);
LOG(GLOBAL, LOG_HEAP, 3, "os_raw_mem_alloc %d bytes failed" PFX "\n", size, p);
return NULL;
}
LOG(GLOBAL, LOG_HEAP, 2, "os_raw_mem_alloc: " SZFMT " bytes @ " PFX "\n", size, p);
return p;
}
#ifdef LINUX
void
init_emulated_brk(app_pc exe_end)
{
ASSERT(DYNAMO_OPTION(emulate_brk));
if (app_brk_map != NULL) {
return;
}
* we need at least a page to have an mmap placeholder. We also want to reserve
* enough memory to avoid a client lib or other mmap truncating the brk at a
* too-small size, which can crash the app (i#3982).
*/
# define BRK_INITIAL_SIZE 4 * 1024 * 1024
app_brk_map = mmap_syscall(exe_end, BRK_INITIAL_SIZE, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
ASSERT(mmap_syscall_succeeded(app_brk_map));
app_brk_cur = app_brk_map;
app_brk_end = app_brk_map + BRK_INITIAL_SIZE;
LOG(GLOBAL, LOG_HEAP, 1, "%s: initial brk is " PFX "-" PFX "\n", __FUNCTION__,
app_brk_cur, app_brk_end);
}
static byte *
emulate_app_brk(dcontext_t *dcontext, byte *new_val)
{
byte *old_brk = app_brk_cur;
ASSERT(DYNAMO_OPTION(emulate_brk));
LOG(THREAD, LOG_HEAP, 2, "%s: cur=" PFX ", requested=" PFX "\n", __FUNCTION__,
app_brk_cur, new_val);
new_val = (byte *)ALIGN_FORWARD(new_val, PAGE_SIZE);
if (new_val == NULL || new_val == app_brk_cur ||
new_val < app_brk_map) {
} else if (new_val < app_brk_cur) {
if (munmap_syscall(new_val, app_brk_cur - new_val) == 0) {
app_brk_cur = new_val;
app_brk_end = new_val;
}
} else if (new_val < app_brk_end) {
app_brk_cur = new_val;
} else {
byte *remap = (byte *)dynamorio_syscall(SYS_mremap, 4, app_brk_map,
app_brk_end - app_brk_map,
new_val - app_brk_map, 0 );
if (mmap_syscall_succeeded(remap)) {
ASSERT(remap == app_brk_map);
app_brk_cur = new_val;
app_brk_end = new_val;
} else {
LOG(THREAD, LOG_HEAP, 1, "%s: mremap to " PFX " failed\n", __FUNCTION__,
new_val);
}
}
if (app_brk_cur != old_brk)
handle_app_brk(dcontext, app_brk_map, old_brk, app_brk_cur);
return app_brk_cur;
}
#endif
#ifdef LINUX
DR_API
* is no realloc for Windows: supposed to reserve yourself and then commit in
* pieces.
*/
void *
dr_raw_mremap(void *old_address, size_t old_size, size_t new_size, int flags,
void *new_address)
{
byte *res;
dr_mem_info_t info;
dcontext_t *dcontext = get_thread_private_dcontext();
DEBUG_DECLARE(bool ok =)
query_memory_ex(old_address, &info);
* inside. For now we assume our handling of it doesn't care.
*/
ASSERT(ok);
if (is_pretend_or_executable_writable(old_address))
info.prot |= DR_MEMPROT_WRITE;
res = (byte *)dynamorio_syscall(SYS_mremap, 5, old_address, old_size, new_size, flags,
new_address);
handle_app_mremap(dcontext, res, new_size, old_address, old_size, info.prot,
info.size);
return res;
}
DR_API
void *
dr_raw_brk(void *new_address)
{
dcontext_t *dcontext = get_thread_private_dcontext();
if (DYNAMO_OPTION(emulate_brk)) {
return (void *)emulate_app_brk(dcontext, (byte *)new_address);
} else {
* it doesn't mater.
*/
if (new_address == NULL) {
return (void *)dynamorio_syscall(SYS_brk, 1, new_address);
} else {
byte *old_brk = (byte *)dynamorio_syscall(SYS_brk, 1, 0);
byte *res = (byte *)dynamorio_syscall(SYS_brk, 1, new_address);
handle_app_brk(dcontext, NULL, old_brk, res);
return res;
}
}
}
#endif
void
os_heap_free(void *p, size_t size, heap_error_code_t *error_code)
{
long rc;
ASSERT(error_code != NULL);
if (!dynamo_exited)
LOG(GLOBAL, LOG_HEAP, 4, "os_heap_free: %d bytes @ " PFX "\n", size, p);
rc = munmap_syscall(p, size);
if (rc != 0) {
*error_code = -rc;
} else {
*error_code = HEAP_ERROR_SUCCESS;
}
ASSERT(rc == 0);
}
and of course no physical pages since it will never be touched */
* instead using mmap, and asserting that all os_heap requests are for
* reasonably large pieces of memory */
void *
os_heap_reserve(void *preferred, size_t size, heap_error_code_t *error_code,
bool executable)
{
void *p;
uint prot = PROT_NONE;
#ifdef VMX86_SERVER
* gencode (PROT_EXEC).
*/
ASSERT(!os_in_vmkernel_userworld() || !executable || preferred == NULL ||
((byte *)preferred >= os_vmk_mmap_text_start() &&
((byte *)preferred) + size <= os_vmk_mmap_text_end()));
* address will be honored, even though any execution there will fault.
*/
* non-committed memory, while not writable, is readable.
* Plus, we can't later clear all prot bits for userworld mmap due to PR 107872
* (PR 365748 covers fixing this for us).
* But in most uses we should get our preferred vmheap and shouldn't run
* out of vmheap, so this should be a corner-case issue.
*/
if (executable)
prot = PROT_EXEC;
#endif
ASSERT(size > 0 && ALIGNED(size, PAGE_SIZE));
ASSERT(error_code != NULL);
uint os_flags = MAP_PRIVATE |
MAP_ANONYMOUS IF_X64(| (DYNAMO_OPTION(heap_in_lower_4GB) ? MAP_32BIT : 0));
#if defined(MACOS) && defined(AARCH64)
if (executable)
os_flags |= MAP_JIT;
#endif
p = mmap_syscall(preferred, size, prot, os_flags, -1, 0);
if (!mmap_syscall_succeeded(p)) {
*error_code = -(heap_error_code_t)(ptr_int_t)p;
LOG(GLOBAL, LOG_HEAP, 4, "os_heap_reserve %d bytes failed " PFX "\n", size, p);
return NULL;
} else if (preferred != NULL && p != preferred) {
* give greater control we fail the reservation. */
heap_error_code_t dummy;
*error_code = HEAP_ERROR_NOT_AT_PREFERRED;
os_heap_free(p, size, &dummy);
ASSERT(dummy == HEAP_ERROR_SUCCESS);
LOG(GLOBAL, LOG_HEAP, 4,
"os_heap_reserve %d bytes at " PFX " not preferred " PFX "\n", size,
preferred, p);
return NULL;
} else {
*error_code = HEAP_ERROR_SUCCESS;
}
LOG(GLOBAL, LOG_HEAP, 2, "os_heap_reserve: %d bytes @ " PFX "\n", size, p);
#ifdef VMX86_SERVER
ASSERT(!os_in_vmkernel_userworld() || !executable ||
((byte *)p >= os_vmk_mmap_text_start() &&
((byte *)p) + size <= os_vmk_mmap_text_end()));
#endif
#if defined(ANDROID) && defined(DEBUG)
dynamorio_syscall(SYS_prctl, 5, PR_SET_VMA, PR_SET_VMA_ANON_NAME, p, size,
"DynamoRIO-internal");
#endif
return p;
}
static bool
find_free_memory_in_region(byte *start, byte *end, size_t size, byte **found_start OUT,
byte **found_end OUT)
{
memquery_iter_t iter;
* give me that address if I use it as a hint.
*/
app_pc last_end = (app_pc)(PAGE_SIZE * 16);
bool found = false;
memquery_iterator_start(&iter, NULL, false );
while (memquery_iterator_next(&iter)) {
if (iter.vm_start >= start &&
MIN(iter.vm_start, end) - MAX(last_end, start) >= size) {
if (found_start != NULL)
*found_start = MAX(last_end, start);
if (found_end != NULL)
*found_end = MIN(iter.vm_start, end);
found = true;
break;
}
if (iter.vm_end >= end)
break;
last_end = iter.vm_end;
}
memquery_iterator_stop(&iter);
return found;
}
void *
os_heap_reserve_in_region(void *start, void *end, size_t size,
heap_error_code_t *error_code, bool executable)
{
byte *p = NULL;
void *find_start = start;
byte *try_start = NULL, *try_end = NULL;
uint iters = 0;
ASSERT(ALIGNED(start, PAGE_SIZE) && ALIGNED(end, PAGE_SIZE));
ASSERT(ALIGNED(size, PAGE_SIZE));
LOG(GLOBAL, LOG_HEAP, 3,
"os_heap_reserve_in_region: " SZFMT " bytes in " PFX "-" PFX "\n", size, start,
end);
if (start == (void *)PTR_UINT_0 && end == (void *)POINTER_MAX)
return os_heap_reserve(NULL, size, error_code, executable);
#define RESERVE_IN_REGION_MAX_ITERS 128
while (find_free_memory_in_region(find_start, end, size, &try_start, &try_end)) {
* a large binary + heap at attach where we're likely to reserve
* right at the start of the brk: we'd prefer to leave more brk space.
*/
p = os_heap_reserve(try_end - size, size, error_code, executable);
if (p != NULL) {
ASSERT(*error_code == HEAP_ERROR_SUCCESS);
ASSERT(p >= (byte *)start && p + size <= (byte *)end);
break;
}
if (++iters > RESERVE_IN_REGION_MAX_ITERS) {
ASSERT_NOT_REACHED();
break;
}
find_start = try_end;
}
if (p == NULL)
*error_code = HEAP_ERROR_CANT_RESERVE_IN_REGION;
else
*error_code = HEAP_ERROR_SUCCESS;
LOG(GLOBAL, LOG_HEAP, 2,
"os_heap_reserve_in_region: reserved " SZFMT " bytes @ " PFX " in " PFX "-" PFX
"\n",
size, p, start, end);
return p;
}
and os_heap_commit on a subset of the reserved pages. */
bool
os_heap_commit(void *p, size_t size, uint prot, heap_error_code_t *error_code)
{
uint os_prot = memprot_to_osprot(prot);
long res;
ASSERT(size > 0 && ALIGNED(size, PAGE_SIZE));
ASSERT(p);
ASSERT(error_code != NULL);
res = mprotect_syscall(p, size, os_prot);
if (res != 0) {
*error_code = -res;
return false;
} else {
*error_code = HEAP_ERROR_SUCCESS;
}
LOG(GLOBAL, LOG_HEAP, 2, "os_heap_commit: %d bytes @ " PFX "\n", size, p);
return true;
}
void
os_heap_decommit(void *p, size_t size, heap_error_code_t *error_code)
{
ASSERT(error_code != NULL);
if (!dynamo_exited)
LOG(GLOBAL, LOG_HEAP, 4, "os_heap_decommit: %d bytes @ " PFX "\n", size, p);
*error_code = HEAP_ERROR_SUCCESS;
p = mmap_syscall(p, size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
we should either do a mremap()
or we can do a munmap() followed 'quickly' by a mmap() -
also see above the comment that os_heap_reserve() in fact is not so lightweight
*/
}
bool
os_heap_systemwide_overcommit(heap_error_code_t last_error_code)
{
return true;
}
bool
os_heap_get_commit_limit(size_t *commit_used, size_t *commit_limit)
{
return false;
}
void
os_thread_yield()
{
#ifdef MACOS
swtch_pri(0);
#else
dynamorio_syscall(SYS_sched_yield, 0);
#endif
}
bool
thread_signal(process_id_t pid, thread_id_t tid, int signum)
{
#ifdef MACOS
* Need to figure out whether we support raw Mach threads w/o pthread on top.
*/
ASSERT_NOT_IMPLEMENTED(false);
return false;
#else
* However, we can have threads sharing address space but not pid
* (if created via CLONE_VM but not CLONE_THREAD), so make sure to
* use the pid of the target thread, not our pid.
*/
return (dynamorio_syscall(SYS_tgkill, 3, pid, tid, signum) == 0);
#endif
}
bool
thread_signal_queue(process_id_t pid, thread_id_t tid, int signum, void *value)
{
#ifdef MACOS
return false;
#else
kernel_siginfo_t info;
memset(&info, 0, sizeof(info));
info.si_signo = signum;
info.si_code = SI_QUEUE;
info.si_value.sival_ptr = value;
* The caller can use is_sigqueue_supported() to check.
*/
return dynamorio_syscall(SYS_rt_tgsigqueueinfo, 4, pid, tid, signum, &info) == 0;
#endif
}
static bool
known_thread_signal(thread_record_t *tr, int signum)
{
#ifdef MACOS
ptr_int_t res;
if (tr->dcontext == NULL)
return FALSE;
res = dynamorio_syscall(SYS___pthread_kill, 2, tr->dcontext->thread_port, signum);
LOG(THREAD_GET, LOG_ALL, 3, "%s: signal %d to port %d => %ld\n", __FUNCTION__, signum,
tr->dcontext->thread_port, res);
return res == 0;
#else
return thread_signal(tr->pid, tr->id, signum);
#endif
}
void
os_thread_sleep(uint64 milliseconds)
{
#ifdef MACOS
semaphore_t sem = MACH_PORT_NULL;
int res;
#else
struct timespec remain;
int count = 0;
#endif
struct timespec req;
req.tv_sec = (milliseconds / 1000);
req.tv_nsec = (milliseconds % 1000) * 1000000;
#ifdef MACOS
if (sem == MACH_PORT_NULL) {
DEBUG_DECLARE(kern_return_t res =)
semaphore_create(mach_task_self(), &sem, SYNC_POLICY_FIFO, 0);
ASSERT(res == KERN_SUCCESS);
}
res =
dynamorio_syscall(SYSNUM_NO_CANCEL(SYS___semwait_signal), 6, sem, MACH_PORT_NULL,
1, 1, (int64_t)req.tv_sec, (int32_t)req.tv_nsec);
if (res == -EINTR) {
}
#else
* be using SYS_clock_nanosleep w/ an absolute time instead of relative
*/
while (dynamorio_syscall(SYS_nanosleep, 2, &req, &remain) == -EINTR) {
ASSERT_CURIOSITY_ONCE(remain.tv_sec <= req.tv_sec &&
(remain.tv_sec < req.tv_sec ||
* remain nsec > req nsec (I've seen 40K diff)
*/
req.tv_nsec - remain.tv_nsec < 100000 ||
req.tv_nsec - remain.tv_nsec > -100000));
* routine sleep forever
*/
if (count++ > 3 && !IS_CLIENT_THREAD(get_thread_private_dcontext())) {
ASSERT_NOT_REACHED();
break;
}
req = remain;
}
#endif
}
static bool
send_suspend_signal(thread_record_t *tr, pid_t pid, thread_id_t tid)
{
#ifdef MACOS
if (tr != NULL)
return known_thread_signal(tr, SUSPEND_SIGNAL);
else
return thread_signal(pid, tid, SUSPEND_SIGNAL);
#else
if (is_sigqueueinfo_enosys) {
* signals, and to support SUSPEND_SIGNAL == NUDGESIG_SIGNUM. If sigqueueinfo
* is not available and SUSPEND_SIGNAL == NUDGESIG_SIGNUM, we won't be able to
* distinguish a suspend from a nudge, but we don't support nudges on old Linux
* kernels anyway (<2.6.31).
*/
if (tr != NULL)
return known_thread_signal(tr, SUSPEND_SIGNAL);
else
return thread_signal(pid, tid, SUSPEND_SIGNAL);
}
kernel_siginfo_t info;
if (!create_nudge_signal_payload(&info, 0, NUDGE_IS_SUSPEND, 0, 0))
return false;
ptr_int_t res =
dynamorio_syscall(SYS_rt_tgsigqueueinfo, 4, pid, tid, SUSPEND_SIGNAL, &info);
return (res >= 0);
#endif
}
bool
os_thread_suspend(thread_record_t *tr)
{
os_thread_data_t *ostd = (os_thread_data_t *)tr->dcontext->os_field;
ASSERT(ostd != NULL);
* prevents prematurely sending a re-suspend signal.
*/
d_r_mutex_lock(&ostd->suspend_lock);
ostd->suspend_count++;
ASSERT(ostd->suspend_count > 0);
* need to ensure the target is suspended in case of a race, so we can't
* just return.
*/
if (ostd->suspend_count == 1) {
* here until the target reaches the suspend point, and leave it
* up to the caller to check whether it is a safe suspend point,
* to match Windows behavior.
*/
ASSERT(ksynch_get_value(&ostd->suspended) == 0);
if (!send_suspend_signal(tr, tr->pid, tr->id)) {
ostd->suspend_count--;
d_r_mutex_unlock(&ostd->suspend_lock);
return false;
}
}
* int and os_thread_resume holds the lock across its wait. this way a resume
* can proceed as soon as the suspended thread is suspended, before the
* suspending thread gets scheduled again.
*/
d_r_mutex_unlock(&ostd->suspend_lock);
while (ksynch_get_value(&ostd->suspended) == 0) {
* doesn't matter because the flag will be re-checked.
*/
* silent hang. We can't safely return false b/c the synch model here
* assumes there will not be a retry until the target reaches the suspend
* point. Xref i#2779.
*/
#define SUSPEND_DEBUG_TIMEOUT_MS 5000
if (ksynch_wait(&ostd->suspended, 0, SUSPEND_DEBUG_TIMEOUT_MS) == -ETIMEDOUT) {
ASSERT_CURIOSITY(false && "failed to suspend thread in 5s");
}
if (ksynch_get_value(&ostd->suspended) == 0) {
os_thread_yield();
}
}
return true;
}
bool
os_thread_resume(thread_record_t *tr)
{
os_thread_data_t *ostd = (os_thread_data_t *)tr->dcontext->os_field;
ASSERT(ostd != NULL);
* reaches a safe post-resume point from a first suspend signal.
* Given that race, we can't just use atomic_add_exchange_int +
* atomic_dec_becomes_zero on suspend_count.
*/
d_r_mutex_lock(&ostd->suspend_lock);
ASSERT(ostd->suspend_count > 0);
* to avoid possible deadlocks
*/
if (ostd->suspend_count == 0) {
d_r_mutex_unlock(&ostd->suspend_lock);
return true;
}
ostd->suspend_count--;
if (ostd->suspend_count > 0) {
d_r_mutex_unlock(&ostd->suspend_lock);
return true;
}
ksynch_set_value(&ostd->wakeup, 1);
ksynch_wake(&ostd->wakeup);
while (ksynch_get_value(&ostd->resumed) == 0) {
* doesn't matter because the flag will be re-checked.
*/
ksynch_wait(&ostd->resumed, 0, 0);
if (ksynch_get_value(&ostd->resumed) == 0) {
os_thread_yield();
}
}
ksynch_set_value(&ostd->wakeup, 0);
ksynch_set_value(&ostd->resumed, 0);
d_r_mutex_unlock(&ostd->suspend_lock);
return true;
}
bool
os_thread_terminate(thread_record_t *tr)
{
* so instead we send SUSPEND_SIGNAL and have a flag set telling
* target thread to execute SYS_exit
*/
os_thread_data_t *ostd = (os_thread_data_t *)tr->dcontext->os_field;
ASSERT(ostd != NULL);
ostd->terminate = true;
* another signal than to resume it.
*/
return send_suspend_signal(tr, tr->pid, tr->id);
}
bool
is_thread_terminated(dcontext_t *dcontext)
{
os_thread_data_t *ostd = (os_thread_data_t *)dcontext->os_field;
ASSERT(ostd != NULL);
return (ksynch_get_value(&ostd->terminated) == 1);
}
static void
os_wait_thread_futex(KSYNCH_TYPE *var)
{
while (ksynch_get_value(var) == 0) {
* doesn't matter because var will be re-checked.
*/
ksynch_wait(var, 0, 0);
if (ksynch_get_value(var) == 0) {
os_thread_yield();
}
}
}
void
os_wait_thread_terminated(dcontext_t *dcontext)
{
os_thread_data_t *ostd = (os_thread_data_t *)dcontext->os_field;
ASSERT(ostd != NULL);
os_wait_thread_futex(&ostd->terminated);
}
void
os_wait_thread_detached(dcontext_t *dcontext)
{
os_thread_data_t *ostd = (os_thread_data_t *)dcontext->os_field;
ASSERT(ostd != NULL);
os_wait_thread_futex(&ostd->detached);
}
void
os_signal_thread_detach(dcontext_t *dcontext)
{
os_thread_data_t *ostd = (os_thread_data_t *)dcontext->os_field;
ASSERT(ostd != NULL);
ostd->do_detach = true;
}
bool
thread_get_mcontext(thread_record_t *tr, priv_mcontext_t *mc)
{
* we then take the signal context
*/
os_thread_data_t *ostd = (os_thread_data_t *)tr->dcontext->os_field;
ASSERT(ostd != NULL);
ASSERT(ostd->suspend_count > 0);
if (ostd->suspend_count == 0)
return false;
ASSERT(ostd->suspended_sigcxt != NULL);
sigcontext_to_mcontext(mc, ostd->suspended_sigcxt, DR_MC_ALL);
IF_ARM(dr_set_isa_mode(tr->dcontext, get_sigcontext_isa_mode(ostd->suspended_sigcxt),
NULL));
return true;
}
bool
thread_set_mcontext(thread_record_t *tr, priv_mcontext_t *mc)
{
* we then replace the signal context
*/
os_thread_data_t *ostd = (os_thread_data_t *)tr->dcontext->os_field;
ASSERT(ostd != NULL);
ASSERT(ostd->suspend_count > 0);
if (ostd->suspend_count == 0)
return false;
ASSERT(ostd->suspended_sigcxt != NULL);
mcontext_to_sigcontext(ostd->suspended_sigcxt, mc, DR_MC_ALL);
IF_ARM(
set_sigcontext_isa_mode(ostd->suspended_sigcxt, dr_get_isa_mode(tr->dcontext)));
return true;
}
bool
os_context_to_mcontext(dr_mcontext_t *dmc, priv_mcontext_t *mc, os_cxt_ptr_t osc)
{
if (dmc != NULL)
sigcontext_to_mcontext(dr_mcontext_as_priv_mcontext(dmc), &osc, dmc->flags);
else if (mc != NULL)
sigcontext_to_mcontext(mc, &osc, DR_MC_ALL);
else
return false;
return true;
}
bool
mcontext_to_os_context(os_cxt_ptr_t osc, dr_mcontext_t *dmc, priv_mcontext_t *mc)
{
if (dmc != NULL)
mcontext_to_sigcontext(&osc, dr_mcontext_as_priv_mcontext(dmc), dmc->flags);
else if (mc != NULL)
mcontext_to_sigcontext(&osc, mc, DR_MC_ALL);
else
return false;
return true;
}
bool
is_thread_currently_native(thread_record_t *tr)
{
return (!tr->under_dynamo_control ||
(tr->dcontext != NULL && tr->dcontext->currently_stopped));
}
#ifdef LINUX
static void
client_thread_run(void)
{
void (*func)(void *param);
dcontext_t *dcontext;
byte *xsp;
GET_STACK_PTR(xsp);
# ifdef AARCH64
* stack. Align to page boundary, similar to that in get_clone_record().
*/
xsp = (app_pc)ALIGN_BACKWARD(xsp, PAGE_SIZE);
# endif
void *crec = get_clone_record((reg_t)xsp);
* to create a client thread during init, but we do not support that thread
* executing until the app has started (b/c we have no signal handlers in place).
*/
* app has started, if we even create the thread before
* dynamo_initialized is set, we will not copy tls blocks. By
* waiting for the app to be started before dynamo_thread_init is
* called, we ensure this race condition can never happen, since
* dynamo_initialized will always be set before the app is started.
*/
wait_for_event(dr_app_started, 0);
IF_DEBUG(int rc =)
dynamo_thread_init(get_clone_record_dstack(crec), NULL, crec, true);
ASSERT(rc != -1);
dcontext = get_thread_private_dcontext();
ASSERT(dcontext != NULL);
LOG(THREAD, LOG_ALL, 1, "\n***** CLIENT THREAD %d *****\n\n", d_r_get_thread_id());
func = (void (*)(void *param))dcontext->next_tag;
signal_swap_mask(dcontext, false );
void *arg = (void *)get_clone_record_app_xsp(crec);
LOG(THREAD, LOG_ALL, 1, "func=" PFX ", arg=" PFX "\n", func, arg);
(*func)(arg);
LOG(THREAD, LOG_ALL, 1, "\n***** CLIENT THREAD %d EXITING *****\n\n",
d_r_get_thread_id());
block_cleanup_and_terminate(dcontext, SYS_exit, 0, 0, false ,
IF_MACOS_ELSE(dcontext->thread_port, 0), 0);
}
#endif
* * thread must have dcontext since many API routines require one and we
* don't expose GLOBAL_DCONTEXT (xref PR 243008, PR 216936, PR 536058)
* * reversed the old design of not using dstack (partly b/c want dcontext)
* and I'm using the same parent-creates-dstack and clone_record_t design
* to create linux threads: dstack should be big enough for client threads
* (xref PR 202669)
* * reversed the old design of explicit dr_terminate_client_thread(): now
* the thread is auto-terminated and stack cleaned up on return from run
* function
*/
DR_API bool
dr_create_client_thread(void (*func)(void *param), void *arg)
{
#ifdef LINUX
dcontext_t *dcontext = get_thread_private_dcontext();
byte *xsp;
* waiting on child.
* We do not pass CLONE_THREAD so that the new thread is in its own thread
* group, allowing it to have private itimers and not receive any signals
* sent to the app's thread groups. It also makes the thread not show up in
* the thread list for the app, making it more invisible.
*/
uint flags = CLONE_VM | CLONE_FS | CLONE_FILES |
CLONE_SIGHAND
IF_VMX86(| (os_in_vmkernel_userworld() ? CLONE_THREAD : 0));
pre_second_thread();
handle_clone(dcontext, flags);
ATOMIC_INC(int, uninit_thread_count);
void *crec = create_clone_record(dcontext, (reg_t *)&xsp, NULL, NULL);
* signal_thread_inherit gets the right syscall info
*/
set_clone_record_fields(crec, (reg_t)arg, (app_pc)func, SYS_clone, flags);
LOG(THREAD, LOG_ALL, 1, "dr_create_client_thread xsp=" PFX " dstack=" PFX "\n", xsp,
get_clone_record_dstack(crec));
* i#3526 switch DR's tls to an invalid one before cloning, and switch lib_tls
* to the app's.
*/
os_clone_pre(dcontext);
# ifdef AARCHXX
* and hitting asserts in dynamo_thread_init lock calls -- yet we don't want to for
* app threads, so we're doing this here and not in os_clone_pre().
* XXX: Find a way to put this in os_clone_* to simplify the code?
*/
void *tls = (void *)read_thread_register(LIB_SEG_TLS);
write_thread_register(NULL);
# endif
thread_id_t newpid = dynamorio_clone(flags, xsp, NULL, NULL, NULL, client_thread_run);
os_clone_post(dcontext);
# ifdef AARCHXX
write_thread_register(tls);
# endif
if (INTERNAL_OPTION(private_loader))
os_switch_lib_tls(dcontext, false );
if (newpid < 0) {
LOG(THREAD, LOG_ALL, 1, "client thread creation failed: %d\n", newpid);
return false;
} else if (newpid == 0) {
ASSERT_NOT_REACHED();
return false;
}
return true;
#else
ASSERT_NOT_IMPLEMENTED(false);
return false;
#endif
}
int
get_num_processors(void)
{
static uint num_cpu = 0;
if (!num_cpu) {
#ifdef MACOS
DEBUG_DECLARE(bool ok =)
sysctl_query(CTL_HW, HW_NCPU, &num_cpu, sizeof(num_cpu));
ASSERT(ok);
#else
* look at the /sys filesystem ourselves, which is what glibc does.
*/
uint local_num_cpus = 0;
file_t cpu_dir = os_open_directory("/sys/devices/system/cpu", OS_OPEN_READ);
dir_iterator_t iter;
ASSERT(cpu_dir != INVALID_FILE &&
"/sys must be mounted: mount -t sysfs sysfs /sys");
os_dir_iterator_start(&iter, cpu_dir);
while (os_dir_iterator_next(&iter)) {
int dummy_num;
if (sscanf(iter.name, "cpu%d", &dummy_num) == 1)
local_num_cpus++;
}
os_close(cpu_dir);
num_cpu = local_num_cpus;
#endif
ASSERT(num_cpu);
}
return num_cpu;
}
* routines in libdl.so. When we do early injection, there is no loader to
* resolve these imports, so they will crash. Early injection is incompatible
* with -no_private_loader, so this should never happen.
*/
shlib_handle_t
load_shared_library(const char *name, bool reachable)
{
#ifdef STATIC_LIBRARY
if (os_files_same(name, get_application_name())) {
* doesn't recognize, so this works without disabling the private loader.
*/
return dlopen(NULL, RTLD_LAZY);
}
#endif
* a pathless name.
*/
if (INTERNAL_OPTION(private_loader))
return (shlib_handle_t)locate_and_load_private_library(name, reachable);
#if defined(STATIC_LIBRARY) || defined(MACOS)
ASSERT(!DYNAMO_OPTION(early_inject));
return dlopen(name, RTLD_LAZY);
#else
* If we want it for hybrid mode we should add a new build param and include
* the libdl calls here under that param.
*/
ASSERT_NOT_REACHED();
return NULL;
#endif
}
shlib_routine_ptr_t
lookup_library_routine(shlib_handle_t lib, const char *name)
{
if (INTERNAL_OPTION(private_loader)) {
return (shlib_routine_ptr_t)get_private_library_address((app_pc)lib, name);
}
#if defined(STATIC_LIBRARY) || defined(MACOS)
ASSERT(!DYNAMO_OPTION(early_inject));
return dlsym(lib, name);
#else
ASSERT_NOT_REACHED();
return NULL;
#endif
}
void
unload_shared_library(shlib_handle_t lib)
{
if (INTERNAL_OPTION(private_loader)) {
unload_private_library(lib);
} else {
#if defined(STATIC_LIBRARY) || defined(MACOS)
ASSERT(!DYNAMO_OPTION(early_inject));
if (!DYNAMO_OPTION(avoid_dlclose)) {
dlclose(lib);
}
#else
ASSERT_NOT_REACHED();
#endif
}
}
void
shared_library_error(char *buf, int maxlen)
{
const char *err;
if (INTERNAL_OPTION(private_loader)) {
err = "error in private loader";
} else {
#if defined(STATIC_LIBRARY) || defined(MACOS)
ASSERT(!DYNAMO_OPTION(early_inject));
err = dlerror();
if (err == NULL) {
err = "dlerror returned NULL";
}
#else
ASSERT_NOT_REACHED();
err = "unknown error";
#endif
}
strncpy(buf, err, maxlen - 1);
buf[maxlen - 1] = '\0';
}
* for linux, one of addr or name is needed; for windows, neither is needed.
*/
bool
shared_library_bounds(IN shlib_handle_t lib, IN byte *addr, IN const char *name,
OUT byte **start, OUT byte **end)
{
ASSERT(start != NULL && end != NULL);
* addr or name
*/
ASSERT(addr != NULL || name != NULL);
*start = addr;
if (INTERNAL_OPTION(private_loader)) {
privmod_t *mod;
acquire_recursive_lock(&privload_lock);
mod = privload_lookup_by_base((app_pc)lib);
if (name != NULL && mod == NULL)
mod = privload_lookup(name);
if (mod != NULL && !mod->externally_loaded) {
*start = mod->base;
if (end != NULL)
*end = mod->base + mod->size;
release_recursive_lock(&privload_lock);
return true;
}
release_recursive_lock(&privload_lock);
}
return (memquery_library_bounds(name, start, end, NULL, 0, NULL, 0) > 0);
}
static int
fcntl_syscall(int fd, int cmd, long arg)
{
return dynamorio_syscall(SYSNUM_NO_CANCEL(SYS_fcntl), 3, fd, cmd, arg);
}
* returns -1 if unsuccessful.
*/
file_t
fd_priv_dup(file_t curfd)
{
file_t newfd = -1;
if (DYNAMO_OPTION(steal_fds) > 0) {
* so how do we tell if the flag is supported? try calling once at init?
*/
newfd = fcntl_syscall(curfd, F_DUPFD, min_dr_fd);
if (newfd < 0) {
* lots of threads. Should we track how many we've given out to
* avoid a failed syscall every time after?
*/
SYSLOG_INTERNAL_WARNING_ONCE("ran out of stolen fd space");
* to avoid issues like tcsh assuming it can own fds 3-5 for
* piping std{in,out,err} (xref the old -open_tcsh_fds option).
*/
newfd = fcntl_syscall(curfd, F_DUPFD, min_dr_fd / 2);
}
}
return newfd;
}
bool
fd_mark_close_on_exec(file_t fd)
{
if (fcntl_syscall(fd, F_SETFD, FD_CLOEXEC) != 0) {
SYSLOG_INTERNAL_WARNING("unable to mark file %d as close-on-exec", fd);
return false;
}
return true;
}
void
fd_table_add(file_t fd, uint flags)
{
if (fd_table != NULL) {
TABLE_RWLOCK(fd_table, write, lock);
DODEBUG({
* stale logfile fd left behind by a vforked or cloned child that
* called execve. Avoid an assert if that happens.
*/
bool present = generic_hash_remove(GLOBAL_DCONTEXT, fd_table, (ptr_uint_t)fd);
ASSERT_CURIOSITY_ONCE(!present && "stale fd not cleaned up");
});
generic_hash_add(GLOBAL_DCONTEXT, fd_table, (ptr_uint_t)fd,
(void *)(ptr_uint_t)(flags | OS_OPEN_RESERVED));
TABLE_RWLOCK(fd_table, write, unlock);
} else {
* we add to the fd_table in d_r_os_init().
*/
ASSERT(num_fd_add_pre_heap < MAX_FD_ADD_PRE_HEAP &&
num_fd_add_pre_heap < (DYNAMO_OPTION(satisfy_w_xor_x) ? 2 : 1) &&
"only main_logfile and dual_map_file should come here");
if (num_fd_add_pre_heap < MAX_FD_ADD_PRE_HEAP) {
fd_add_pre_heap[num_fd_add_pre_heap] = fd;
fd_add_pre_heap_flags[num_fd_add_pre_heap] = flags;
num_fd_add_pre_heap++;
}
}
}
void
fd_table_remove(file_t fd)
{
if (fd_table != NULL) {
TABLE_RWLOCK(fd_table, write, lock);
generic_hash_remove(GLOBAL_DCONTEXT, fd_table, (ptr_uint_t)fd);
TABLE_RWLOCK(fd_table, write, unlock);
} else {
ASSERT(dynamo_exited || standalone_library);
}
}
static bool
fd_is_dr_owned(file_t fd)
{
ptr_uint_t flags;
ASSERT(fd_table != NULL);
TABLE_RWLOCK(fd_table, read, lock);
flags = (ptr_uint_t)generic_hash_lookup(GLOBAL_DCONTEXT, fd_table, (ptr_uint_t)fd);
TABLE_RWLOCK(fd_table, read, unlock);
return (flags != 0);
}
static bool
fd_is_in_private_range(file_t fd)
{
return (DYNAMO_OPTION(steal_fds) > 0 && min_dr_fd > 0 && fd >= min_dr_fd);
}
file_t
os_open_protected(const char *fname, int os_open_flags)
{
file_t dup;
file_t res = os_open(fname, os_open_flags);
if (res < 0)
return res;
* not worth the extra syscall for temporary open/close sequences so we
* only use it for persistent files
*/
dup = fd_priv_dup(res);
if (dup >= 0) {
close_syscall(res);
res = dup;
fd_mark_close_on_exec(res);
}
* grabbing locks and often don't have heap available so no fd_table
*/
fd_table_add(res, os_open_flags);
return res;
}
void
os_close_protected(file_t f)
{
fd_table_remove(f);
os_close(f);
}
bool
os_get_current_dir(char *buf, size_t bufsz)
{
#ifdef MACOS
static char noheap_buf[MAXPATHLEN];
bool res = false;
file_t fd = os_open(".", OS_OPEN_READ);
int len;
char *fcntl_buf;
if (dynamo_heap_initialized)
fcntl_buf = global_heap_alloc(MAXPATHLEN HEAPACCT(ACCT_OTHER));
else
fcntl_buf = noheap_buf;
if (fd == INVALID_FILE)
goto cwd_error;
if (fcntl_syscall(fd, F_GETPATH, (long)fcntl_buf) != 0)
goto cwd_error;
len = snprintf(buf, bufsz, "%s", fcntl_buf);
buf[bufsz - 1] = '\0';
return (len > 0 && len < bufsz);
cwd_error:
if (dynamo_heap_initialized)
global_heap_free(fcntl_buf, MAXPATHLEN HEAPACCT(ACCT_OTHER));
os_close(fd);
return res;
#else
return (dynamorio_syscall(SYS_getcwd, 2, buf, bufsz) > 0);
#endif
}
ssize_t
os_write(file_t f, const void *buf, size_t count)
{
return write_syscall(f, buf, count);
}
* it here. We use a loop to ensure reachability for the core.
*/
byte *
os_map_file(file_t f, size_t *size INOUT, uint64 offs, app_pc addr, uint prot,
map_flags_t map_flags)
{
int flags;
byte *map = NULL;
#if defined(X64)
bool loop = false;
uint iters = 0;
# define MAX_MMAP_LOOP_ITERS 100
byte *region_start = NULL, *region_end = NULL;
#else
uint pg_offs;
ASSERT_TRUNCATE(pg_offs, uint, offs / PAGE_SIZE);
pg_offs = (uint)(offs / PAGE_SIZE);
#endif
#ifdef VMX86_SERVER
flags = MAP_PRIVATE;
#else
flags = TEST(MAP_FILE_COPY_ON_WRITE, map_flags) ? MAP_PRIVATE : MAP_SHARED;
#endif
#if defined(X64)
* in particular): for low 4GB, easiest to just pass MAP_32BIT (which is
* low 2GB, but good enough).
*/
if (DYNAMO_OPTION(heap_in_lower_4GB) &&
!TESTANY(MAP_FILE_FIXED | MAP_FILE_APP, map_flags))
flags |= MAP_32BIT;
#endif
* so we can request memory from a particular address with fixed argument */
if (f == -1)
flags |= MAP_ANONYMOUS;
if (TEST(MAP_FILE_FIXED, map_flags))
flags |= MAP_FIXED;
#if defined(X64)
if (!TEST(MAP_32BIT, flags) && TEST(MAP_FILE_REACHABLE, map_flags)) {
vmcode_get_reachable_region(®ion_start, ®ion_end);
ASSERT(!TEST(MAP_FILE_FIXED, map_flags));
loop = true;
}
if ((!TEST(MAP_32BIT, flags) && TEST(MAP_FILE_REACHABLE, map_flags) &&
(is_vmm_reserved_address(addr, *size, NULL, NULL) ||
* window during attach where another thread could take this spot,
* and with this current code we'll never go back and try to get VMM
* memory. We live with that as being rare rather than complicate the code.
*/
!rel32_reachable_from_current_vmcode(addr))) ||
(TEST(MAP_FILE_FIXED, map_flags) && !TEST(MAP_FILE_VMM_COMMIT, map_flags) &&
is_vmm_reserved_address(addr, *size, NULL, NULL))) {
if (DYNAMO_OPTION(vm_reserve)) {
map = heap_reserve_for_external_mapping(addr, *size,
VMM_SPECIAL_MMAP | VMM_REACHABLE);
if (map != NULL) {
addr = map;
flags |= MAP_FIXED;
}
}
}
while (!loop ||
(addr != NULL && addr >= region_start && addr + *size <= region_end) ||
find_free_memory_in_region(region_start, region_end, *size, &addr, NULL)) {
#endif
map = mmap_syscall(addr, *size, memprot_to_osprot(prot), flags, f,
IF_LINUX_ELSE(IF_X64_ELSE(offs, pg_offs), offs));
if (!mmap_syscall_succeeded(map)) {
LOG(THREAD_GET, LOG_SYSCALLS, 2, "%s failed: " PIFX "\n", __func__, map);
map = NULL;
}
#if defined(X64)
else if (loop && (map < region_start || map + *size > region_end)) {
* region big enough" matches the kernel's, else we'll loop forever
* (which we try to catch w/ a max iters count).
*/
munmap_syscall(map, *size);
map = NULL;
} else
break;
if (!loop)
break;
if (++iters > MAX_MMAP_LOOP_ITERS) {
ASSERT_NOT_REACHED();
map = NULL;
break;
}
addr = NULL;
}
#endif
return map;
}
bool
os_unmap_file(byte *map, size_t size)
{
if (DYNAMO_OPTION(vm_reserve) && is_vmm_reserved_address(map, size, NULL, NULL)) {
* how it originally reserved the memory. To do that would we expose a way
* to ask for MAP_FIXED in os_heap_reserve*()?
*/
byte *addr = mmap_syscall(map, size, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
if (!mmap_syscall_succeeded(addr))
return false;
return heap_unreserve_for_external_mapping(map, size,
VMM_SPECIAL_MMAP | VMM_REACHABLE);
}
long res = munmap_syscall(map, size);
return (res == 0);
}
#ifdef LINUX
static void
os_get_memory_file_shm_path(const char *name, OUT char *buf, size_t bufsz)
{
snprintf(buf, bufsz, "/dev/shm/%s.%d", name, get_process_id());
buf[bufsz - 1] = '\0';
}
#endif
file_t
os_create_memory_file(const char *name, size_t size)
{
#ifdef LINUX
char path[MAXIMUM_PATH];
file_t fd;
* has no name conflict issues, stale files left around on a crash, or
* reliance on tmpfs).
*/
# ifdef SYS_memfd_create
snprintf(path, BUFFER_SIZE_ELEMENTS(path), "/%s.%d", name, get_process_id());
NULL_TERMINATE_BUFFER(path);
fd = dynamorio_syscall(SYS_memfd_create, 2, path, 0);
# else
fd = -ENOSYS;
# endif
if (fd == -ENOSYS) {
os_get_memory_file_shm_path(name, path, BUFFER_SIZE_ELEMENTS(path));
NULL_TERMINATE_BUFFER(path);
fd = open_syscall(path, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR);
if (fd == -EEXIST) {
SYSLOG_INTERNAL_WARNING("Removing presumed-stale %s", path);
os_delete_file(path);
fd = open_syscall(path, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR);
}
}
if (fd < 0)
return INVALID_FILE;
* Architecture") slowdown where the first executable mmap causes a hash
* to be computed of the entire file size, which can take 5 or 10
* *seconds* for gigabyte files. This is only done once, so if we
* trigger it while the file is tiny, we can avoid the delay later.
*/
byte *temp_map = mmap_syscall(0, PAGE_SIZE, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
if (mmap_syscall_succeeded(temp_map))
munmap_syscall(temp_map, PAGE_SIZE);
if (dynamorio_syscall(SYS_ftruncate, 2, fd, size) < 0) {
close_syscall(fd);
return INVALID_FILE;
}
file_t priv_fd = fd_priv_dup(fd);
close_syscall(fd);
if (priv_fd < 0) {
return INVALID_FILE;
}
fd = priv_fd;
fd_mark_close_on_exec(fd);
fd_table_add(fd, 0 );
return fd;
#else
ASSERT_NOT_IMPLEMENTED(false && "i#3556 NYI for Mac");
return INVALID_FILE;
#endif
}
void
os_delete_memory_file(const char *name, file_t fd)
{
#ifdef LINUX
* to clean it up. We blindly do this rather than trying to record whether
* we created this file.
*/
char path[MAXIMUM_PATH];
os_get_memory_file_shm_path(name, path, BUFFER_SIZE_ELEMENTS(path));
NULL_TERMINATE_BUFFER(path);
os_delete_file(path);
fd_table_remove(fd);
close_syscall(fd);
#else
ASSERT_NOT_IMPLEMENTED(false && "i#3556 NYI for Mac");
#endif
}
bool
os_get_disk_free_space( file_t file_handle,
uint64 *AvailableQuotaBytes ,
uint64 *TotalQuotaBytes ,
uint64 *TotalVolumeBytes )
{
struct statfs stat;
ptr_int_t res = dynamorio_syscall(SYS_fstatfs, 2, file_handle, &stat);
if (res != 0) {
LOG(THREAD_GET, LOG_SYSCALLS, 2, "%s failed: " PIFX "\n", __func__, res);
return false;
}
LOG(GLOBAL, LOG_STATS, 3,
"os_get_disk_free_space: avail=" SZFMT ", free=" SZFMT ", bsize=" SZFMT "\n",
stat.f_bavail, stat.f_bfree, stat.f_bsize);
if (AvailableQuotaBytes != NULL)
*AvailableQuotaBytes = ((uint64)stat.f_bavail * stat.f_bsize);
if (TotalQuotaBytes != NULL)
*TotalQuotaBytes = ((uint64)stat.f_bavail * stat.f_bsize);
if (TotalVolumeBytes != NULL)
*TotalVolumeBytes = ((uint64)stat.f_bfree * stat.f_bsize);
return true;
}
#ifdef LINUX
static bool
symlink_is_self_exe(const char *path)
{
* or "/proc/self/exe". Rule out the exe link for another process
* (though it could also be under DR we have no simple way to obtain
* its actual app path).
*/
# define SELF_LEN_LEADER 6
# define SELF_LEN_TRAILER 4
# define SELF_LEN_MAX 18
size_t len = strlen(path);
if (strcmp(path, "/proc/self/exe") == 0)
return true;
if (len < SELF_LEN_MAX &&
strncmp(path, "/proc/", SELF_LEN_LEADER) == 0 &&
strncmp(path + len - SELF_LEN_TRAILER, "/exe", SELF_LEN_TRAILER) == 0) {
int pid;
if (sscanf(path + SELF_LEN_LEADER, "%d", &pid) == 1) {
char task[32];
snprintf(task, BUFFER_SIZE_ELEMENTS(task), "/proc/self/task/%d", pid);
NULL_TERMINATE_BUFFER(task);
return os_file_exists(task, true );
}
}
return false;
}
#endif
void
exit_process_syscall(long status)
{
* but will execute there. We try exit_group and if it fails we use exit.
*
* FIXME: if no exit_group, kill all other threads (==processes in same addr
* space) manually? Presumably we got here b/c at an unsafe point to do
* full exit? Or is that not true: what about dr_abort()?
*/
dynamorio_syscall(SYSNUM_EXIT_PROCESS, 1, status);
exit_thread_syscall(status);
ASSERT_NOT_REACHED();
}
void
exit_thread_syscall(long status)
{
#ifdef MACOS
mach_port_t thread_port = dynamorio_mach_syscall(MACH_thread_self_trap, 0);
* bsdthread_terminate.
*/
dynamorio_syscall(SYSNUM_EXIT_THREAD, 4, 0, 0, thread_port, 0);
#else
dynamorio_syscall(SYSNUM_EXIT_THREAD, 1, status);
#endif
}
yet it is easier to have a syslog based Unix implementation with real strings.
*/
void
os_syslog(syslog_event_type_t priority, uint message_id, uint substitutions_num,
va_list args)
{
int native_priority UNUSED;
switch (priority) {
case SYSLOG_INFORMATION: native_priority = LOG_INFO; break;
case SYSLOG_WARNING: native_priority = LOG_WARNING; break;
case SYSLOG_CRITICAL: native_priority = LOG_CRIT; break;
case SYSLOG_ERROR: native_priority = LOG_ERR; break;
default: ASSERT_NOT_REACHED();
}
ASSERT_NOT_IMPLEMENTED(false);
}
* there should only be one live thread.
*/
static bool
safe_read_via_query(const void *base, size_t size, void *out_buf, size_t *bytes_read)
{
bool res = false;
size_t num_read = 0;
ASSERT(!fault_handling_initialized);
* here, but we check it nevertheless to be general in case this routine is
* ever called at some later time
*/
if (IF_MEMQUERY_ELSE(false, memcache_initialized()))
res = is_readable_without_exception_internal(base, size, false );
else
res = is_readable_without_exception_query_os((void *)base, size);
if (res) {
memcpy(out_buf, base, size);
num_read = size;
}
if (bytes_read != NULL)
*bytes_read = num_read;
return res;
}
bool
safe_read_ex(const void *base, size_t size, void *out_buf, size_t *bytes_read)
{
STATS_INC(num_safe_reads);
* call layer, but safe_read_fast() requires fault handling to be set up.
* We do set up an early signal handler in d_r_os_init(),
* but there is still be a window prior to that with no handler.
*/
if (!fault_handling_initialized) {
return safe_read_via_query(base, size, out_buf, bytes_read);
} else {
return safe_read_fast(base, size, out_buf, bytes_read);
}
}
bool
safe_read_if_fast(const void *base, size_t size, void *out_buf)
{
if (!fault_handling_initialized) {
memcpy(out_buf, base, size);
return true;
} else {
return safe_read_ex(base, size, out_buf, NULL);
}
}
bool
d_r_safe_read(const void *base, size_t size, void *out_buf)
{
return safe_read_ex(base, size, out_buf, NULL);
}
bool
safe_write_ex(void *base, size_t size, const void *in_buf, size_t *bytes_written)
{
return safe_write_try_except(base, size, in_buf, bytes_written);
}
* from pc to pc+size-1 are readable and that reading from there won't
* generate an exception. if 'from_os' is true, check what the os thinks
* the prot bits are instead of using the all memory list.
*/
static bool
is_readable_without_exception_internal(const byte *pc, size_t size, bool query_os)
{
uint prot = MEMPROT_NONE;
byte *check_pc = (byte *)ALIGN_BACKWARD(pc, PAGE_SIZE);
if (size > ((byte *)POINTER_MAX - pc))
size = (byte *)POINTER_MAX - pc;
do {
bool rc = query_os ? get_memory_info_from_os(check_pc, NULL, NULL, &prot)
: get_memory_info(check_pc, NULL, NULL, &prot);
if (!rc || !TESTANY(MEMPROT_READ | MEMPROT_EXEC, prot))
return false;
if (POINTER_OVERFLOW_ON_ADD(check_pc, PAGE_SIZE))
break;
check_pc += PAGE_SIZE;
} while (check_pc < pc + size);
return true;
}
bool
is_readable_without_exception(const byte *pc, size_t size)
{
* accurate in the past. Parsing proc maps is too slow for some apps, so we
* use a runtime option.
*/
bool query_os = IF_MEMQUERY_ELSE(true, !DYNAMO_OPTION(use_all_memory_areas));
return is_readable_without_exception_internal(pc, size, query_os);
}
* for info on the indicated region */
bool
is_readable_without_exception_query_os(byte *pc, size_t size)
{
return is_readable_without_exception_internal(pc, size, true);
}
bool
is_readable_without_exception_query_os_noblock(byte *pc, size_t size)
{
if (memquery_from_os_will_block())
return false;
return is_readable_without_exception_internal(pc, size, true);
}
bool
is_user_address(byte *pc)
{
return true;
}
* this does not update the all memory area info
*/
bool
os_set_protection(byte *pc, size_t length, uint prot )
{
app_pc start_page = (app_pc)PAGE_START(pc);
uint num_bytes = ALIGN_FORWARD(length + (pc - start_page), PAGE_SIZE);
long res = 0;
uint flags = memprot_to_osprot(prot);
DOSTATS({
if (!TEST(PROT_WRITE, flags)) {
STATS_INC(protection_change_calls);
STATS_ADD(protection_change_pages, num_bytes / PAGE_SIZE);
}
});
res = mprotect_syscall((void *)start_page, num_bytes, flags);
if (res != 0)
return false;
LOG(THREAD_GET, LOG_VMAREAS, 3,
"change_prot(" PFX ", " PIFX ", %s) => "
"mprotect(" PFX ", " PIFX ", %d)==%d pages\n",
pc, length, memprot_string(prot), start_page, num_bytes, flags,
num_bytes / PAGE_SIZE);
DOSTATS({
if (TEST(PROT_WRITE, flags)) {
STATS_INC(protection_change_calls);
STATS_ADD(protection_change_pages, num_bytes / PAGE_SIZE);
}
});
return true;
}
bool
set_protection(byte *pc, size_t length, uint prot )
{
if (os_set_protection(pc, length, prot) == false)
return false;
#ifndef HAVE_MEMINFO_QUERY
else {
app_pc start_page = (app_pc)PAGE_START(pc);
uint num_bytes = ALIGN_FORWARD(length + (pc - start_page), PAGE_SIZE);
memcache_update_locked(start_page, start_page + num_bytes, prot,
-1 , true );
}
#endif
return true;
}
bool
change_protection(byte *pc, size_t length, bool writable)
{
if (writable)
return make_writable(pc, length);
else
make_unwritable(pc, length);
return true;
}
bool
make_writable(byte *pc, size_t size)
{
long res;
app_pc start_page = (app_pc)PAGE_START(pc);
size_t prot_size = (size == 0) ? PAGE_SIZE : size;
uint prot = PROT_EXEC | PROT_READ | PROT_WRITE;
* this is crucial on modern linux kernels which refuse to mark stack +x.
*/
if (!is_in_dynamo_dll(pc) &&
#ifdef STATIC_LIBRARY
* but we can't call get_memory_info() until allmem is initialized. Our
* uses before then are for patching x86.asm, which is OK.
*/
IF_NO_MEMQUERY(memcache_initialized() &&)
#endif
get_memory_info(pc, NULL, NULL, &prot))
prot |= PROT_WRITE;
ASSERT(start_page == pc && ALIGN_FORWARD(size, PAGE_SIZE) == size);
res = mprotect_syscall((void *)start_page, prot_size, prot);
LOG(THREAD_GET, LOG_VMAREAS, 3, "make_writable: pc " PFX " -> " PFX "-" PFX " %d\n",
pc, start_page, start_page + prot_size, res);
ASSERT(res == 0);
if (res != 0)
return false;
STATS_INC(protection_change_calls);
STATS_ADD(protection_change_pages, size / PAGE_SIZE);
#ifndef HAVE_MEMINFO_QUERY
if (memcache_initialized()) {
memcache_update_locked(start_page, start_page + prot_size,
osprot_to_memprot(prot), -1 ,
true );
}
#endif
return true;
}
bool
make_copy_on_writable(byte *pc, size_t size)
{
return make_writable(pc, size);
}
void
make_unwritable(byte *pc, size_t size)
{
app_pc start_page = (app_pc)PAGE_START(pc);
size_t prot_size = (size == 0) ? PAGE_SIZE : size;
uint prot = PROT_EXEC | PROT_READ;
* this is crucial on modern linux kernels which refuse to mark stack +x.
*/
if (!is_in_dynamo_dll(pc) &&
#ifdef STATIC_LIBRARY
* but we can't call get_memory_info() until allmem is initialized. Our
* uses before then are for patching x86.asm, which is OK.
*/
IF_NO_MEMQUERY(memcache_initialized() &&)
#endif
get_memory_info(pc, NULL, NULL, &prot))
prot &= ~PROT_WRITE;
ASSERT(start_page == pc && ALIGN_FORWARD(size, PAGE_SIZE) == size);
STATS_INC(protection_change_calls);
STATS_ADD(protection_change_pages, size / PAGE_SIZE);
DEBUG_DECLARE(long res =) mprotect_syscall((void *)start_page, prot_size, prot);
LOG(THREAD_GET, LOG_VMAREAS, 3, "make_unwritable: pc " PFX " -> " PFX "-" PFX "\n",
pc, start_page, start_page + prot_size);
ASSERT(res == 0);
#ifndef HAVE_MEMINFO_QUERY
if (memcache_initialized()) {
memcache_update_locked(start_page, start_page + prot_size,
osprot_to_memprot(prot), -1 ,
false );
}
#endif
}
* numbers used by libc are in /usr/include/asm/unistd.h
* kernel defines are in /usr/src/linux-2.4/include/asm-i386/unistd.h
* kernel function names are in /usr/src/linux/arch/i386/kernel/entry.S
*
* For now, we've copied the SYS/NR defines from syscall.h and unistd.h
* and put them in our own local syscall.h.
*/
* For a live system call, dcontext_live should be passed (for examining
* the dcontext->last_exit and exit_reason flags); otherwise, gateway should
* be passed.
*/
int
os_normalized_sysnum(int num_raw, instr_t *gateway, dcontext_t *dcontext)
{
#ifdef MACOS
* We drop the 0x2000000 for BSD so we can use the SYS_ enum constants.
* That leaves 0x1000000 for Mach and 0x3000000 for Machdep.
* On 32-bit, a different encoding is used: we transform that
* to the x64 encoding minus BSD.
*/
int interrupt = 0;
int num = 0;
if (gateway != NULL) {
if (instr_is_interrupt(gateway))
interrupt = instr_get_interrupt_number(gateway);
} else {
ASSERT(dcontext != NULL);
if (TEST(LINK_SPECIAL_EXIT, dcontext->last_exit->flags)) {
if (dcontext->upcontext.upcontext.exit_reason ==
EXIT_REASON_NI_SYSCALL_INT_0x81)
interrupt = 0x81;
else {
ASSERT(dcontext->upcontext.upcontext.exit_reason ==
EXIT_REASON_NI_SYSCALL_INT_0x82);
interrupt = 0x82;
}
}
}
# ifdef X64
if (TEST(SYSCALL_NUM_MARKER_BSD, num_raw))
return (int)(num_raw & 0xffffff);
else
num = (int)num_raw;
# else
if ((ptr_int_t)num_raw < 0)
return (SYSCALL_NUM_MARKER_MACH | -(int)num_raw);
else {
num = (int)(num_raw & 0xffff);
}
# endif
if (interrupt == 0x81)
num |= SYSCALL_NUM_MARKER_MACH;
else if (interrupt == 0x82)
num |= SYSCALL_NUM_MARKER_MACHDEP;
return num;
#else
return num_raw;
#endif
}
static bool
ignorable_system_call_normalized(int num)
{
switch (num) {
#if defined(SYS_exit_group)
case SYS_exit_group:
#endif
case SYS_exit:
#ifdef MACOS
case SYS_bsdthread_terminate:
#endif
#ifdef LINUX
case SYS_brk:
# ifdef SYS_uselib
case SYS_uselib:
# endif
#endif
#if defined(X64) || !defined(ARM)
case SYS_mmap:
#endif
#if !defined(X64) && !defined(MACOS)
case SYS_mmap2:
#endif
case SYS_munmap:
#ifdef LINUX
case SYS_mremap:
#endif
case SYS_mprotect:
#ifdef ANDROID
case SYS_prctl:
#endif
case SYS_execve:
#ifdef LINUX
case SYS_clone3:
case SYS_clone:
#elif defined(MACOS)
case SYS_bsdthread_create:
case SYS_posix_spawn:
#endif
#ifdef SYS_fork
case SYS_fork:
#endif
#ifdef SYS_vfork
case SYS_vfork:
#endif
case SYS_kill:
#if defined(SYS_tkill)
case SYS_tkill:
#endif
#if defined(SYS_tgkill)
case SYS_tgkill:
#endif
#if defined(LINUX) && !defined(X64) && !defined(ARM)
case SYS_signal:
#endif
#ifdef MACOS
case SYS_sigsuspend_nocancel:
#endif
#if !defined(X64) || defined(MACOS)
case SYS_sigaction:
case SYS_sigsuspend:
case SYS_sigpending:
case SYS_sigreturn:
case SYS_sigprocmask:
#endif
#ifdef LINUX
case SYS_rt_sigreturn:
case SYS_rt_sigaction:
case SYS_rt_sigprocmask:
case SYS_rt_sigpending:
# ifdef SYS_rt_sigtimedwait_time64
case SYS_rt_sigtimedwait_time64:
# endif
case SYS_rt_sigtimedwait:
case SYS_rt_sigqueueinfo:
case SYS_rt_tgsigqueueinfo:
case SYS_rt_sigsuspend:
# ifdef SYS_signalfd
case SYS_signalfd:
# endif
case SYS_signalfd4:
#endif
case SYS_sigaltstack:
#if defined(LINUX) && !defined(X64) && !defined(ARM)
case SYS_sgetmask:
case SYS_ssetmask:
#endif
case SYS_setitimer:
case SYS_getitimer:
#ifdef MACOS
case SYS_close_nocancel:
#endif
#ifdef SYS_close_range
case SYS_close_range:
#endif
case SYS_close:
#ifdef SYS_dup2
case SYS_dup2:
#endif
#ifdef LINUX
case SYS_dup3:
#endif
#ifdef MACOS
case SYS_fcntl_nocancel:
#endif
case SYS_fcntl:
#if defined(X64) || !defined(ARM)
case SYS_getrlimit:
#endif
#if defined(LINUX) && !defined(X64)
case SYS_ugetrlimit:
#endif
case SYS_setrlimit:
#ifdef LINUX
case SYS_prlimit64:
#endif
#if defined(LINUX) && defined(X86)
case SYS_alarm:
#endif
* need stop app from clobbering our GDT slot.
*/
#if defined(LINUX) && defined(X86) && defined(X64)
case SYS_arch_prctl:
#endif
#if defined(LINUX) && defined(X86)
case SYS_set_thread_area:
case SYS_get_thread_area:
#endif
#if defined(LINUX) && defined(ARM)
case SYS_set_tls:
case SYS_cacheflush:
#endif
#if defined(LINUX)
# ifdef SYS_pselect6_time64
case SYS_pselect6_time64:
# endif
case SYS_pselect6:
# ifdef SYS_ppoll_time64
case SYS_ppoll_time64:
# endif
case SYS_ppoll:
case SYS_epoll_pwait:
case SYS_rseq:
#endif
#ifdef DEBUG
# ifdef MACOS
case SYS_open_nocancel:
# endif
# ifdef SYS_open
case SYS_open:
# endif
#endif
return false;
#ifdef LINUX
# ifdef SYS_readlink
case SYS_readlink:
# endif
case SYS_readlinkat: return !DYNAMO_OPTION(early_inject);
#endif
#ifdef SYS_openat2
case SYS_openat2:
#endif
case SYS_openat: return IS_STRING_OPTION_EMPTY(xarch_root);
default:
#ifdef VMX86_SERVER
if (is_vmkuw_sysnum(num))
return vmkuw_ignorable_system_call(num);
#endif
return true;
}
}
bool
ignorable_system_call(int num_raw, instr_t *gateway, dcontext_t *dcontext_live)
{
return ignorable_system_call_normalized(
os_normalized_sysnum(num_raw, gateway, dcontext_live));
}
typedef struct {
unsigned long addr;
unsigned long len;
unsigned long prot;
unsigned long flags;
unsigned long fd;
unsigned long offset;
} mmap_arg_struct_t;
static inline reg_t *
sys_param_addr(dcontext_t *dcontext, int num)
{
priv_mcontext_t *mc = get_mcontext(dcontext);
#if defined(X86) && defined(X64)
switch (num) {
case 0: return &mc->xdi;
case 1: return &mc->xsi;
case 2: return &mc->xdx;
case 3: return &mc->r10;
case 4: return &mc->r8;
case 5: return &mc->r9;
default: CLIENT_ASSERT(false, "invalid system call parameter number");
}
#else
# if defined(MACOS) && defined(X86)
* make that field Linux-only.
*/
* (regardless of whether using a sysenter or int gateway).
*/
return ((reg_t *)mc->esp) + 1 + num;
# endif
* ebp, the original parameter registers are not yet changed pre-syscall,
* except for ebp, which is pushed on the stack:
* 0xffffe400 55 push %ebp %esp -> %esp (%esp)
* 0xffffe401 89 cd mov %ecx -> %ebp
* 0xffffe403 0f 05 syscall -> %ecx
*
* 0xffffe400 51 push %ecx %esp -> %esp (%esp)
* 0xffffe401 52 push %edx %esp -> %esp (%esp)
* 0xffffe402 55 push %ebp %esp -> %esp (%esp)
* 0xffffe403 89 e5 mov %esp -> %ebp
* 0xffffe405 0f 34 sysenter -> %esp
*/
switch (num) {
case 0: return &mc->IF_X86_ELSE(xbx, IF_RISCV64_ELSE(a0, r0));
case 1: return &mc->IF_X86_ELSE(xcx, IF_RISCV64_ELSE(a1, r1));
case 2: return &mc->IF_X86_ELSE(xdx, IF_RISCV64_ELSE(a2, r2));
case 3: return &mc->IF_X86_ELSE(xsi, IF_RISCV64_ELSE(a3, r3));
case 4: return &mc->IF_X86_ELSE(xdi, IF_RISCV64_ELSE(a4, r4));
* See the #if 0 below, as well. */
case 5:
return IF_X86_ELSE((dcontext->sys_was_int ? &mc->xbp : ((reg_t *)mc->xsp)),
&mc->IF_RISCV64_ELSE(a5, r5));
# ifdef ARM
case 6: return &mc->r6;
# endif
default: CLIENT_ASSERT(false, "invalid system call parameter number");
}
#endif
return 0;
}
static inline reg_t
sys_param(dcontext_t *dcontext, int num)
{
return *sys_param_addr(dcontext, num);
}
void
set_syscall_param(dcontext_t *dcontext, int param_num, reg_t new_value)
{
*sys_param_addr(dcontext, param_num) = new_value;
}
static inline bool
syscall_successful(priv_mcontext_t *mc, int normalized_sysnum)
{
#ifdef MACOS
if (TEST(SYSCALL_NUM_MARKER_MACH, normalized_sysnum)) {
* for others that return mach_port_t 0 is failure (I think?).
* We defer to drsyscall.
*/
return ((ptr_int_t)MCXT_SYSCALL_RES(mc) >= 0);
} else {
# ifdef X86
return !TEST(EFLAGS_CF, mc->xflags);
# elif defined(AARCH64)
return !TEST(EFLAGS_C, mc->xflags);
# else
# error NYI
# endif
}
#else
if (normalized_sysnum == IF_X64_ELSE(SYS_mmap, SYS_mmap2) ||
# if !defined(ARM) && !defined(X64)
normalized_sysnum == SYS_mmap ||
# endif
normalized_sysnum == SYS_mremap)
return mmap_syscall_succeeded((byte *)MCXT_SYSCALL_RES(mc));
return ((ptr_int_t)MCXT_SYSCALL_RES(mc) >= 0);
#endif
}
* For Mac, this clears CF and just sets xax. To return a 64-bit value in
* 32-bit mode, the caller must explicitly set xdx as well (we don't always
* do so b/c syscalls that just return 32-bit values do not touch xdx).
*/
static inline void
set_success_return_val(dcontext_t *dcontext, reg_t val)
{
priv_mcontext_t *mc = get_mcontext(dcontext);
#ifdef MACOS
* there it doesn't hurt to set CF.
*/
# ifdef X86
mc->xflags &= ~(EFLAGS_CF);
# elif defined(AARCH64)
mc->xflags &= ~(EFLAGS_C);
# else
# error NYI
# endif
#endif
MCXT_SYSCALL_RES(mc) = val;
}
static inline void
set_failure_return_val(dcontext_t *dcontext, uint errno_val)
{
priv_mcontext_t *mc = get_mcontext(dcontext);
#ifdef MACOS
# ifdef X86
mc->xflags |= EFLAGS_CF;
# elif defined(AARCH64)
mc->xflags |= EFLAGS_C;
# else
# error NYI
# endif
MCXT_SYSCALL_RES(mc) = errno_val;
#else
MCXT_SYSCALL_RES(mc) = -(int)errno_val;
#endif
}
DR_API
reg_t
dr_syscall_get_param(void *drcontext, int param_num)
{
dcontext_t *dcontext = (dcontext_t *)drcontext;
CLIENT_ASSERT(dcontext->client_data->in_pre_syscall,
"dr_syscall_get_param() can only be called from pre-syscall event");
return sys_param(dcontext, param_num);
}
DR_API
void
dr_syscall_set_param(void *drcontext, int param_num, reg_t new_value)
{
dcontext_t *dcontext = (dcontext_t *)drcontext;
CLIENT_ASSERT(dcontext->client_data->in_pre_syscall ||
dcontext->client_data->in_post_syscall,
"dr_syscall_set_param() can only be called from a syscall event");
*sys_param_addr(dcontext, param_num) = new_value;
}
DR_API
reg_t
dr_syscall_get_result(void *drcontext)
{
dcontext_t *dcontext = (dcontext_t *)drcontext;
CLIENT_ASSERT(dcontext->client_data->in_post_syscall,
"dr_syscall_get_param() can only be called from post-syscall event");
return MCXT_SYSCALL_RES(get_mcontext(dcontext));
}
DR_API
bool
dr_syscall_get_result_ex(void *drcontext, dr_syscall_result_info_t *info INOUT)
{
dcontext_t *dcontext = (dcontext_t *)drcontext;
priv_mcontext_t *mc = get_mcontext(dcontext);
CLIENT_ASSERT(dcontext->client_data->in_post_syscall,
"only call dr_syscall_get_param_ex() from post-syscall event");
CLIENT_ASSERT(info != NULL, "invalid parameter");
CLIENT_ASSERT(info->size == sizeof(*info), "invalid dr_syscall_result_info_t size");
if (info->size != sizeof(*info))
return false;
info->value = MCXT_SYSCALL_RES(mc);
info->succeeded = syscall_successful(mc, dcontext->sys_num);
if (info->use_high) {
* xdx:xax, but the other syscalls don't clear xdx, so we can't easily
* return a 64-bit value all the time.
*/
IF_X86_ELSE({ info->high = mc->xdx; }, { ASSERT_NOT_REACHED(); });
}
if (info->use_errno) {
if (info->succeeded)
info->errno_value = 0;
else {
info->errno_value = (uint)IF_LINUX(-(int)) MCXT_SYSCALL_RES(mc);
}
}
return true;
}
DR_API
void
dr_syscall_set_result(void *drcontext, reg_t value)
{
dcontext_t *dcontext = (dcontext_t *)drcontext;
CLIENT_ASSERT(dcontext->client_data->in_pre_syscall ||
dcontext->client_data->in_post_syscall,
"dr_syscall_set_result() can only be called from a syscall event");
set_success_return_val(dcontext, value);
}
DR_API
bool
dr_syscall_set_result_ex(void *drcontext, dr_syscall_result_info_t *info)
{
dcontext_t *dcontext = (dcontext_t *)drcontext;
priv_mcontext_t *mc = get_mcontext(dcontext);
CLIENT_ASSERT(dcontext->client_data->in_pre_syscall ||
dcontext->client_data->in_post_syscall,
"dr_syscall_set_result() can only be called from a syscall event");
CLIENT_ASSERT(info->size == sizeof(*info), "invalid dr_syscall_result_info_t size");
if (info->size != sizeof(*info))
return false;
if (info->use_errno) {
if (info->succeeded) {
set_success_return_val(dcontext, info->errno_value);
} else
set_failure_return_val(dcontext, info->errno_value);
} else {
if (info->succeeded)
set_success_return_val(dcontext, info->value);
else {
set_failure_return_val(dcontext, (uint)info->value);
MCXT_SYSCALL_RES(mc) = info->value;
}
if (info->use_high) {
* xdx:xax.
*/
IF_X86_ELSE({ mc->xdx = info->high; }, { ASSERT_NOT_REACHED(); });
}
}
return true;
}
DR_API
void
dr_syscall_set_sysnum(void *drcontext, int new_num)
{
dcontext_t *dcontext = (dcontext_t *)drcontext;
priv_mcontext_t *mc = get_mcontext(dcontext);
CLIENT_ASSERT(dcontext->client_data->in_pre_syscall ||
dcontext->client_data->in_post_syscall,
"dr_syscall_set_sysnum() can only be called from a syscall event");
MCXT_SYSNUM_REG(mc) = new_num;
}
DR_API
void
dr_syscall_invoke_another(void *drcontext)
{
dcontext_t *dcontext = (dcontext_t *)drcontext;
CLIENT_ASSERT(dcontext->client_data->in_post_syscall,
"dr_syscall_invoke_another() can only be called from post-syscall "
"event");
LOG(THREAD, LOG_SYSCALLS, 2, "invoking additional syscall on client request\n");
dcontext->client_data->invoke_another_syscall = true;
#ifdef X86
if (get_syscall_method() == SYSCALL_METHOD_SYSENTER) {
priv_mcontext_t *mc = get_mcontext(dcontext);
mc->xbp = mc->xsp;
}
#endif
}
static inline bool
is_thread_create_syscall_helper(ptr_uint_t sysnum, uint64 flags)
{
#ifdef MACOS
* bsdthread_register in order to capture workqueue threads.
*/
return (sysnum == SYS_bsdthread_create || sysnum == SYS_vfork);
#else
# ifdef SYS_vfork
if (sysnum == SYS_vfork)
return true;
# endif
# ifdef LINUX
if ((sysnum == SYS_clone || sysnum == SYS_clone3) && TEST(CLONE_VM, flags))
return true;
# endif
return false;
#endif
}
bool
is_thread_create_syscall(dcontext_t *dcontext _IF_LINUX(void *maybe_clone_args))
{
priv_mcontext_t *mc = get_mcontext(dcontext);
uint64 flags = sys_param(dcontext, 0);
ptr_uint_t sysnum = MCXT_SYSNUM_REG(mc);
#ifdef LINUX
* a safe read from the user-provided syscall args.
*/
if (sysnum == SYS_clone3) {
ASSERT(maybe_clone_args != NULL);
flags = ((clone3_syscall_args_t *)maybe_clone_args)->flags;
}
#endif
return is_thread_create_syscall_helper(sysnum, flags);
}
#ifdef LINUX
static ptr_uint_t
get_stored_clone3_flags(dcontext_t *dcontext)
{
return ((uint64)dcontext->sys_param4 << 32) | dcontext->sys_param3;
}
#endif
bool
was_thread_create_syscall(dcontext_t *dcontext)
{
uint64 flags = dcontext->sys_param0;
#ifdef LINUX
if (dcontext->sys_num == SYS_clone3)
flags = get_stored_clone3_flags(dcontext);
#endif
return is_thread_create_syscall_helper(dcontext->sys_num, flags);
}
bool
is_sigreturn_syscall_number(int sysnum)
{
#ifdef MACOS
return sysnum == SYS_sigreturn;
#else
return (IF_NOT_X64(sysnum == SYS_sigreturn ||) sysnum == SYS_rt_sigreturn);
#endif
}
bool
is_sigreturn_syscall(dcontext_t *dcontext)
{
priv_mcontext_t *mc = get_mcontext(dcontext);
return is_sigreturn_syscall_number(MCXT_SYSNUM_REG(mc));
}
bool
was_sigreturn_syscall(dcontext_t *dcontext)
{
return is_sigreturn_syscall_number(dcontext->sys_num);
}
static void
handle_self_signal(dcontext_t *dcontext, uint sig)
{
* and for any thread in the group, not just self.
*
* FIXME PR 297033: watch for SIGSTOP and SIGCONT.
*
* With -intercept_all_signals, we only need to watch for SIGKILL
* and SIGSTOP here, and we avoid the FIXMEs below. If it's fine
* for DR not to clean up on a SIGKILL, then SIGSTOP is all that's
* left (at least once we have PR 297033 and are intercepting the
* various STOP variations and CONT).
*/
if (sig == SIGABRT && !DYNAMO_OPTION(intercept_all_signals)) {
LOG(GLOBAL, LOG_TOP | LOG_SYSCALLS, 1,
"thread " TIDFMT " sending itself a SIGABRT\n", d_r_get_thread_id());
KSTOP(num_exits_dir_syscall);
* Should do set_default_signal_action(SIGABRT) (and set a flag so
* no races w/ another thread re-installing?) and then SYS_kill.
*/
block_cleanup_and_terminate(dcontext, SYSNUM_EXIT_THREAD, -1, 0,
(is_last_app_thread() && !dynamo_exited),
IF_MACOS_ELSE(dcontext->thread_port, 0), 0);
ASSERT_NOT_REACHED();
}
}
* EXECVE
*/
enum {
ENV_PROP_RUNUNDER,
ENV_PROP_OPTIONS,
ENV_PROP_EXECVE_LOGDIR,
ENV_PROP_EXE_PATH,
ENV_PROP_CONFIGDIR,
};
static const char *const env_to_propagate[] = {
DYNAMORIO_VAR_RUNUNDER,
DYNAMORIO_VAR_OPTIONS,
* - DYNAMORIO_VAR_LOGDIR: a parent dir inside which a new dir will be created;
* - DYNAMORIO_VAR_EXECVE_LOGDIR: the same subdir with the pre-execve process.
* Xref comment in create_log_dir about their precedence.
*/
DYNAMORIO_VAR_EXECVE_LOGDIR,
DYNAMORIO_VAR_EXE_PATH,
DYNAMORIO_VAR_CONFIGDIR,
DYNAMORIO_VAR_AUTOINJECT,
DYNAMORIO_VAR_ALTINJECT,
};
#define NUM_ENV_TO_PROPAGATE (sizeof(env_to_propagate) / sizeof(env_to_propagate[0]))
* For late injection via libdrpreload, we call this for *all children, because
* even if -no_follow_children is specified, a allowlist will still ask for takeover
* and it's libdrpreload who checks the allowlist.
* For -early, however, we check the config ahead of time and only call this routine
* if we in fact want to inject.
* XXX i#1679: these parent vs child differences bring up corner cases of which
* config dir takes precedence (if the child clears the HOME env var, e.g.).
*/
static void
add_dr_env_vars(dcontext_t *dcontext, char *inject_library_path, const char *app_path)
{
char **envp = (char **)sys_param(dcontext, 2);
int idx, j, preload = -1, ldpath = -1;
int num_old, num_new, sz;
bool need_var[NUM_ENV_TO_PROPAGATE];
int prop_idx[NUM_ENV_TO_PROPAGATE];
bool ldpath_us = false, preload_us = false;
char **new_envp, *var, *old;
for (j = 0; j < NUM_ENV_TO_PROPAGATE; j++) {
prop_idx[j] = -1;
if (get_config_val(env_to_propagate[j]) == NULL)
need_var[j] = false;
else
need_var[j] = true;
}
* we only need it if follow_children is true and PROCESS_DIR exists.
*/
if (DYNAMO_OPTION(follow_children) && get_log_dir(PROCESS_DIR, NULL, NULL))
need_var[ENV_PROP_EXECVE_LOGDIR] = true;
else
need_var[ENV_PROP_EXECVE_LOGDIR] = false;
if (DYNAMO_OPTION(early_inject))
need_var[ENV_PROP_EXE_PATH] = true;
if (envp == NULL) {
LOG(THREAD, LOG_SYSCALLS, 3, "\tenv is NULL\n");
idx = 0;
} else {
for (idx = 0; envp[idx] != NULL; idx++) {
ASSERT(strstr(envp[idx], DYNAMORIO_VAR_EXECVE) != envp[idx]);
for (j = 0; j < NUM_ENV_TO_PROPAGATE; j++) {
if (strstr(envp[idx], env_to_propagate[j]) == envp[idx]) {
* are for DR usage only, and replace them with cfg value.
*/
prop_idx[j] = idx;
break;
}
}
if (!DYNAMO_OPTION(early_inject) &&
strstr(envp[idx], "LD_LIBRARY_PATH=") == envp[idx]) {
ldpath = idx;
if (strstr(envp[idx], inject_library_path) != NULL)
ldpath_us = true;
}
if (!DYNAMO_OPTION(early_inject) &&
strstr(envp[idx], "LD_PRELOAD=") == envp[idx]) {
preload = idx;
if (strstr(envp[idx], DYNAMORIO_PRELOAD_NAME) != NULL &&
strstr(envp[idx], get_dynamorio_library_path()) != NULL) {
preload_us = true;
}
}
LOG(THREAD, LOG_SYSCALLS, 3, "\tenv %d: %s\n", idx, envp[idx]);
}
}
* array. We have to deallocate them and restore the old
* envp if execve fails; if execve succeeds, the address
* space is reset so we don't need to do anything.
*/
num_old = idx;
num_new = 2 +
(DYNAMO_OPTION(early_inject)
? 0
: (((preload < 0) ? 1 : 0) + ((ldpath < 0) ? 1 : 0)));
for (j = 0; j < NUM_ENV_TO_PROPAGATE; j++) {
if ((DYNAMO_OPTION(follow_children) || j == ENV_PROP_EXE_PATH) && need_var[j] &&
prop_idx[j] < 0)
num_new++;
}
new_envp =
heap_alloc(dcontext, sizeof(char *) * (num_old + num_new) HEAPACCT(ACCT_OTHER));
memcpy(new_envp, envp, sizeof(char *) * num_old);
if (!DYNAMO_OPTION(early_inject) && !preload_us) {
int idx_preload;
LOG(THREAD, LOG_SYSCALLS, 1,
"WARNING: execve env does NOT preload DynamoRIO, forcing it!\n");
if (preload >= 0) {
const char *dr_lib_path = get_dynamorio_library_path();
sz = strlen(envp[preload]) + strlen(DYNAMORIO_PRELOAD_NAME) +
strlen(dr_lib_path) + 3;
var = heap_alloc(dcontext, sizeof(char) * sz HEAPACCT(ACCT_OTHER));
old = envp[preload] + strlen("LD_PRELOAD=");
snprintf(var, sz, "LD_PRELOAD=%s %s %s", DYNAMORIO_PRELOAD_NAME, dr_lib_path,
old);
idx_preload = preload;
} else {
const char *dr_lib_path = get_dynamorio_library_path();
sz = strlen("LD_PRELOAD=") + strlen(DYNAMORIO_PRELOAD_NAME) +
strlen(dr_lib_path) + 2;
var = heap_alloc(dcontext, sizeof(char) * sz HEAPACCT(ACCT_OTHER));
snprintf(var, sz, "LD_PRELOAD=%s %s", DYNAMORIO_PRELOAD_NAME, dr_lib_path);
idx_preload = idx++;
}
*(var + sz - 1) = '\0';
new_envp[idx_preload] = var;
LOG(THREAD, LOG_SYSCALLS, 2, "\tnew env %d: %s\n", idx_preload,
new_envp[idx_preload]);
}
if (!DYNAMO_OPTION(early_inject) && !ldpath_us) {
int idx_ldpath;
if (ldpath >= 0) {
sz = strlen(envp[ldpath]) + strlen(inject_library_path) + 2;
var = heap_alloc(dcontext, sizeof(char) * sz HEAPACCT(ACCT_OTHER));
old = envp[ldpath] + strlen("LD_LIBRARY_PATH=");
snprintf(var, sz, "LD_LIBRARY_PATH=%s:%s", inject_library_path, old);
idx_ldpath = ldpath;
} else {
sz = strlen("LD_LIBRARY_PATH=") + strlen(inject_library_path) + 1;
var = heap_alloc(dcontext, sizeof(char) * sz HEAPACCT(ACCT_OTHER));
snprintf(var, sz, "LD_LIBRARY_PATH=%s", inject_library_path);
idx_ldpath = idx++;
}
*(var + sz - 1) = '\0';
new_envp[idx_ldpath] = var;
LOG(THREAD, LOG_SYSCALLS, 2, "\tnew env %d: %s\n", idx_ldpath,
new_envp[idx_ldpath]);
}
for (j = 0; j < NUM_ENV_TO_PROPAGATE; j++) {
const char *val = "";
if (!need_var[j])
continue;
if (!DYNAMO_OPTION(follow_children) && j != ENV_PROP_EXE_PATH)
continue;
switch (j) {
case ENV_PROP_RUNUNDER:
ASSERT(strcmp(env_to_propagate[j], DYNAMORIO_VAR_RUNUNDER) == 0);
* If rununder var is already set we assume it's set to 1.
*/
ASSERT((RUNUNDER_ON | RUNUNDER_ALL) == 0x3);
val = "3";
break;
case ENV_PROP_OPTIONS:
ASSERT(strcmp(env_to_propagate[j], DYNAMORIO_VAR_OPTIONS) == 0);
val = d_r_option_string;
break;
case ENV_PROP_EXECVE_LOGDIR:
ASSERT(strcmp(env_to_propagate[j], DYNAMORIO_VAR_EXECVE_LOGDIR) == 0);
ASSERT(get_log_dir(PROCESS_DIR, NULL, NULL));
break;
case ENV_PROP_EXE_PATH:
ASSERT(strcmp(env_to_propagate[j], DYNAMORIO_VAR_EXE_PATH) == 0);
val = app_path;
break;
default:
val = getenv(env_to_propagate[j]);
if (val == NULL)
val = "";
break;
}
if (j == ENV_PROP_EXECVE_LOGDIR) {
uint logdir_length;
get_log_dir(PROCESS_DIR, NULL, &logdir_length);
sz = strlen(DYNAMORIO_VAR_EXECVE_LOGDIR) + logdir_length + 1 ;
var = heap_alloc(dcontext, sizeof(char) * sz HEAPACCT(ACCT_OTHER));
snprintf(var, sz, "%s=", DYNAMORIO_VAR_EXECVE_LOGDIR);
get_log_dir(PROCESS_DIR, var + strlen(var), &logdir_length);
} else {
sz = strlen(env_to_propagate[j]) + strlen(val) + 2 ;
var = heap_alloc(dcontext, sizeof(char) * sz HEAPACCT(ACCT_OTHER));
snprintf(var, sz, "%s=%s", env_to_propagate[j], val);
}
*(var + sz - 1) = '\0';
prop_idx[j] = (prop_idx[j] >= 0) ? prop_idx[j] : idx++;
new_envp[prop_idx[j]] = var;
LOG(THREAD, LOG_SYSCALLS, 2, "\tnew env %d: %s\n", prop_idx[j],
new_envp[prop_idx[j]]);
}
if (!DYNAMO_OPTION(follow_children) && !DYNAMO_OPTION(early_inject)) {
if (prop_idx[ENV_PROP_RUNUNDER] >= 0) {
* on other side to inject if config file exists.
* kind of hacky mangle here:
*/
ASSERT(!need_var[ENV_PROP_RUNUNDER]);
ASSERT(new_envp[prop_idx[ENV_PROP_RUNUNDER]][0] == 'D');
new_envp[prop_idx[ENV_PROP_RUNUNDER]][0] = 'X';
}
}
sz = strlen(DYNAMORIO_VAR_EXECVE) + 4;
var = heap_alloc(dcontext, sizeof(char) * sz HEAPACCT(ACCT_OTHER));
ASSERT(os_tls_get_gdt_index(dcontext) < 100 &&
os_tls_get_gdt_index(dcontext) >= -1);
snprintf(var, sz, "%s=%02d", DYNAMORIO_VAR_EXECVE, os_tls_get_gdt_index(dcontext));
*(var + sz - 1) = '\0';
new_envp[idx++] = var;
LOG(THREAD, LOG_SYSCALLS, 2, "\tnew env %d: %s\n", idx - 1, new_envp[idx - 1]);
new_envp[idx++] = NULL;
ASSERT((num_new + num_old) == idx);
*sys_param_addr(dcontext, 2) = (reg_t)new_envp;
* this is a vfork thread
*/
dcontext->sys_param0 = (reg_t)envp;
dcontext->sys_param1 = (reg_t)new_envp;
}
static ssize_t
script_file_reader(const char *pathname, void *buf, size_t count)
{
file_t file = os_open(pathname, OS_OPEN_READ);
size_t len;
if (file == INVALID_FILE)
return -1;
len = os_read(file, buf, count);
os_close(file);
return len;
}
* modify the syscall parameters to invoke a script interpreter instead. In
* this case we will have allocated memory here but we expect the caller to
* do a non-failing execve of libdynamorio.so and therefore not to have to
* free the memory. That is one reason for checking that the (final) script
* interpreter really is an executable binary.
* We recognise one error case here and return the non-zero error code (ELOOP)
* but in other cases we leave it up to the caller to detect the error, which
* it may do by attempting to exec the path natively, expecting this to fail,
* though there is the obvious danger that the file might have been modified
* just before the exec.
* We do not, and cannot easily, handle a file that is executable but not
* readable. Currently such files will be executed without DynamoRIO though
* in some situations it would be more helpful to stop with an error.
*
* XXX: There is a minor transparency bug with misformed binaries. For example,
* execve can return EINVAL if the ELF executable has more than one PT_INTERP
* segment but we do not check this and so under DynamoRIO the error would be
* detected only after the exec, if we are following the child.
*
* FIXME i#2091: There is a memory leak if a script is recognised, and it is
* later decided not to inject (see where should_inject is set), and the exec
* fails, because in this case there is no mechanism for freeing the memory
* allocated in this function. This function should return sufficient information
* for the caller to free the memory, which it can do so before the exec if it
* reverts to the original syscall arguments and execs the script.
*/
static int
handle_execve_script(dcontext_t *dcontext)
{
char *fname = (char *)sys_param(dcontext, 0);
char **orig_argv = (char **)sys_param(dcontext, 1);
script_interpreter_t *script;
int ret = 0;
script = global_heap_alloc(sizeof(*script) HEAPACCT(ACCT_OTHER));
if (!find_script_interpreter(script, fname, script_file_reader))
goto free_and_return;
if (script->argc == 0) {
ret = ELOOP;
goto free_and_return;
}
{
file_t file = os_open(script->argv[0], OS_OPEN_READ);
bool is64;
if (file == INVALID_FILE)
goto free_and_return;
if (!module_file_is_module64(file, &is64, NULL)) {
os_close(file);
goto free_and_return;
}
}
{
size_t i, orig_argc = 0;
char **new_argv;
while (orig_argv[orig_argc] != NULL)
++orig_argc;
if (orig_argc == 0)
orig_argc = 1;
new_argv = global_heap_alloc((script->argc + orig_argc + 1) *
sizeof(char *) HEAPACCT(ACCT_OTHER));
for (i = 0; i < script->argc; i++)
new_argv[i] = script->argv[i];
new_argv[script->argc] = fname;
for (i = 1; i < orig_argc; i++)
new_argv[script->argc + i] = orig_argv[i];
new_argv[script->argc + orig_argc] = NULL;
*sys_param_addr(dcontext, 0) = (reg_t)new_argv[0];
*sys_param_addr(dcontext, 1) = (reg_t)new_argv;
}
return 0;
free_and_return:
global_heap_free(script, sizeof(*script) HEAPACCT(ACCT_OTHER));
return ret;
}
static int
handle_execve(dcontext_t *dcontext)
{
* asmlinkage int sys_execve(struct pt_regs regs) { ...
* error = do_execve(filename, (char **) regs.xcx, (char **) regs.xdx, ®s);
* in fs/exec.c:
* int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs)
*/
*
* For legacy late injection:
* we simply make sure LD_PRELOAD contains us, and that our directory
* is on LD_LIBRARY_PATH (seems not to work to put absolute paths in
* LD_PRELOAD).
* (This doesn't work for setuid programs though.)
*
* For -follow_children we also pass the current DYNAMORIO_RUNUNDER and
* DYNAMORIO_OPTIONS and logdir to the new image to support a simple
* run-all-children model without bothering w/ setting up config files for
* children, and to support injecting across execve that does not
* preserve $HOME.
* FIXME i#287/PR 546544: we'll need to propagate DYNAMORIO_AUTOINJECT too
* once we use it in preload
*/
* set across execve: going to ignore for now
*/
char *fname;
bool x64 = IF_X64_ELSE(true, false);
bool expect_to_fail UNUSED = false;
bool should_inject;
file_t file;
char *inject_library_path;
char rununder_buf[16];
bool app_specific, from_env, rununder_on;
#if defined(LINUX) || defined(DEBUG)
const char **argv;
#endif
if (DYNAMO_OPTION(follow_children) && DYNAMO_OPTION(early_inject)) {
int ret = handle_execve_script(dcontext);
if (ret != 0)
return ret;
}
fname = (char *)sys_param(dcontext, 0);
#if defined(LINUX) || defined(DEBUG)
argv = (const char **)sys_param(dcontext, 1);
#endif
#ifdef LINUX
if (DYNAMO_OPTION(early_inject) && symlink_is_self_exe(fname)) {
* the right thing here.
*/
fname = get_application_name();
}
#endif
LOG(GLOBAL, LOG_ALL, 1,
"\n---------------------------------------------------------------------------"
"\n");
LOG(THREAD, LOG_ALL, 1,
"\n---------------------------------------------------------------------------"
"\n");
DODEBUG({
int i;
SYSLOG_INTERNAL_INFO("-- execve %s --", fname);
LOG(THREAD, LOG_SYSCALLS, 1, "syscall: execve %s\n", fname);
LOG(GLOBAL, LOG_TOP | LOG_SYSCALLS, 1, "execve %s\n", fname);
if (d_r_stats->loglevel >= 3) {
if (argv == NULL) {
LOG(THREAD, LOG_SYSCALLS, 3, "\targs are NULL\n");
} else {
for (i = 0; argv[i] != NULL; i++) {
LOG(THREAD, LOG_SYSCALLS, 2, "\targ %d: len=%d\n", i,
strlen(argv[i]));
LOG(THREAD, LOG_SYSCALLS, 3, "\targ %d: %s\n", i, argv[i]);
}
}
}
});
* process and if we exec then the parent process will still be alive. We
* can't easily clean our own state (dcontext, dstack, etc.) up in our
* parent process: we need it to invoke the syscall and the syscall might
* fail. We could expand cleanup_and_terminate to also be able to invoke
* SYS_execve: but execve seems more likely to fail than termination
* syscalls. Our solution is to mark this thread as "execve" and hide it
* from regular thread queries; we clean it up in the process-exiting
* synch_with_thread(), or if the same parent thread performs another vfork
* (to prevent heap accumulation from repeated vfork+execve). Since vfork
* on linux suspends the parent, there cannot be any races with the execve
* syscall completing: there can't even be peer vfork threads, so we could
* set a flag and clean up in d_r_dispatch, but that seems overkill. (If vfork
* didn't suspend the parent we'd need to touch a marker file or something
* to know the execve was finished.)
*/
mark_thread_execve(dcontext->thread_record, true);
#ifdef STATIC_LIBRARY
SYSLOG_INTERNAL_WARNING("WARNING: static DynamoRIO library, losing control on "
"execve");
return 0;
#endif
file = os_open(fname, OS_OPEN_READ);
if (file != INVALID_FILE) {
if (!module_file_is_module64(file, &x64,
NULL ))
expect_to_fail = true;
os_close(file);
} else
expect_to_fail = true;
inject_library_path =
IF_X64_ELSE(x64, !x64) ? dynamorio_library_path : dynamorio_alt_arch_path;
should_inject = DYNAMO_OPTION(follow_children);
if (get_config_val_other_app(get_short_name(fname), get_process_id(),
x64 ? DR_PLATFORM_64BIT : DR_PLATFORM_32BIT,
DYNAMORIO_VAR_RUNUNDER, rununder_buf,
BUFFER_SIZE_ELEMENTS(rununder_buf), &app_specific,
&from_env, NULL )) {
if (should_inject_from_rununder(rununder_buf, app_specific, from_env,
&rununder_on))
should_inject = rununder_on;
}
if (should_inject)
add_dr_env_vars(dcontext, inject_library_path, fname);
else {
dcontext->sys_param0 = 0;
dcontext->sys_param1 = 0;
}
#ifdef LINUX
* and fail the syscall once the kernel execs DR!
*/
if (should_inject && DYNAMO_OPTION(early_inject) && !expect_to_fail) {
const char *drpath = IF_X64_ELSE(x64, !x64) ? dynamorio_library_filepath
: dynamorio_alt_arch_filepath;
TRY_EXCEPT(
dcontext,
{
if (symlink_is_self_exe(argv[0])) {
*/
dcontext->sys_param3 = (reg_t)argv;
argv[0] = fname;
} else
dcontext->sys_param3 = 0;
dcontext->sys_param4 = (reg_t)fname;
*sys_param_addr(dcontext, 0) = (reg_t)drpath;
LOG(THREAD, LOG_SYSCALLS, 2, "actual execve on: %s\n",
(char *)sys_param(dcontext, 0));
},
{
dcontext->sys_param3 = 0;
dcontext->sys_param4 = 0;
LOG(THREAD, LOG_SYSCALLS, 2,
"argv is unreadable, expect execve to fail\n");
});
} else {
dcontext->sys_param3 = 0;
dcontext->sys_param4 = 0;
}
#endif
* we'll just live w/o dynamic option re-read.
*/
d_r_config_exit();
return 0;
}
static void
handle_execve_post(dcontext_t *dcontext)
{
* or we did an execve from a vfork and its memory changes are visible
* in the parent process.
* we have to restore env to how it was and free the allocated heap.
*/
char **old_envp = (char **)dcontext->sys_param0;
char **new_envp = (char **)dcontext->sys_param1;
#ifdef STATIC_LIBRARY
return;
#endif
#ifdef LINUX
if (dcontext->sys_param4 != 0) {
*sys_param_addr(dcontext, 0) = dcontext->sys_param4;
if (dcontext->sys_param3 != 0) {
const char **argv = (const char **)dcontext->sys_param3;
argv[0] = (const char *)dcontext->sys_param4;
}
}
#endif
if (new_envp != NULL) {
int i;
LOG(THREAD, LOG_SYSCALLS, 2, "\tcleaning up our env vars\n");
* we can't compare to old_envp b/c it may have changed by now.
*/
for (i = 0; new_envp[i] != NULL; i++) {
if (is_dynamo_address((byte *)new_envp[i])) {
heap_free(dcontext, new_envp[i],
sizeof(char) * (strlen(new_envp[i]) + 1) HEAPACCT(ACCT_OTHER));
}
}
i++;
heap_free(dcontext, new_envp, sizeof(char *) * i HEAPACCT(ACCT_OTHER));
if (!dcontext->thread_record->execve)
*sys_param_addr(dcontext, 2) = (reg_t)old_envp;
}
}
* child who invoked execve here so we have at most one outstanding thread. we
* also clean up at process exit and before thread creation. we could do this
* in d_r_dispatch but too rare to be worth a flag check there.
*/
static void
cleanup_after_vfork_execve(dcontext_t *dcontext)
{
thread_record_t **threads;
int num_threads, i;
if (num_execve_threads == 0)
return;
d_r_mutex_lock(&thread_initexit_lock);
get_list_of_threads_ex(&threads, &num_threads, true );
for (i = 0; i < num_threads; i++) {
if (threads[i]->execve) {
LOG(THREAD, LOG_SYSCALLS, 2, "cleaning up earlier vfork thread " TIDFMT "\n",
threads[i]->id);
dynamo_other_thread_exit(threads[i]);
}
}
d_r_mutex_unlock(&thread_initexit_lock);
global_heap_free(threads,
num_threads * sizeof(thread_record_t *) HEAPACCT(ACCT_THREAD_MGT));
}
static void
set_stdfile_fileno(stdfile_t **stdfile, file_t file_no)
{
#ifdef STDFILE_FILENO
(*stdfile)->STDFILE_FILENO = file_no;
#else
# warning stdfile_t is opaque; DynamoRIO will not set fds of libc FILEs.
SYSLOG_INTERNAL_WARNING_ONCE(
"DynamoRIO cannot set the file descriptors of private libc FILEs on "
"this platform. Client usage of stdio.h stdin, stdout, or stderr may "
"no longer work as expected, because the app is closing the UNIX fds "
"backing these.");
#endif
}
static bool
handle_close_generic_pre(dcontext_t *dcontext, file_t fd, bool set_return_val)
{
LOG(THREAD, LOG_SYSCALLS, 3, "syscall: close fd %d\n", fd);
if (fd_is_dr_owned(fd)) {
SYSLOG_INTERNAL_WARNING_ONCE("app trying to close DR file(s)");
LOG(THREAD, LOG_TOP | LOG_SYSCALLS, 1,
"WARNING: app trying to close DR file %d! Not allowing it.\n", fd);
if (set_return_val) {
if (DYNAMO_OPTION(fail_on_stolen_fds)) {
set_failure_return_val(dcontext, EBADF);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
} else
set_success_return_val(dcontext, 0);
}
return false;
}
* a client) can continue to use them for logging. */
if (DYNAMO_OPTION(dup_stdout_on_close) && fd == STDOUT) {
our_stdout = fd_priv_dup(fd);
if (our_stdout < 0)
our_stdout = dup_syscall(fd);
if (our_stdout >= 0)
fd_mark_close_on_exec(our_stdout);
fd_table_add(our_stdout, 0);
LOG(THREAD, LOG_TOP | LOG_SYSCALLS, 1,
"WARNING: app is closing stdout=%d - duplicating descriptor for "
"DynamoRIO usage got %d.\n",
fd, our_stdout);
if (privmod_stdout != NULL && INTERNAL_OPTION(private_loader)) {
set_stdfile_fileno(privmod_stdout, our_stdout);
}
}
if (DYNAMO_OPTION(dup_stderr_on_close) && fd == STDERR) {
our_stderr = fd_priv_dup(fd);
if (our_stderr < 0)
our_stderr = dup_syscall(fd);
if (our_stderr >= 0)
fd_mark_close_on_exec(our_stderr);
fd_table_add(our_stderr, 0);
LOG(THREAD, LOG_TOP | LOG_SYSCALLS, 1,
"WARNING: app is closing stderr=%d - duplicating descriptor for "
"DynamoRIO usage got %d.\n",
fd, our_stderr);
if (privmod_stderr != NULL && INTERNAL_OPTION(private_loader)) {
set_stdfile_fileno(privmod_stderr, our_stderr);
}
}
if (DYNAMO_OPTION(dup_stdin_on_close) && fd == STDIN) {
our_stdin = fd_priv_dup(fd);
if (our_stdin < 0)
our_stdin = dup_syscall(fd);
if (our_stdin >= 0)
fd_mark_close_on_exec(our_stdin);
fd_table_add(our_stdin, 0);
LOG(THREAD, LOG_TOP | LOG_SYSCALLS, 1,
"WARNING: app is closing stdin=%d - duplicating descriptor for "
"DynamoRIO usage got %d.\n",
fd, our_stdin);
if (privmod_stdin != NULL && INTERNAL_OPTION(private_loader)) {
set_stdfile_fileno(privmod_stdin, our_stdin);
}
}
return true;
}
static bool
handle_close_pre(dcontext_t *dcontext)
{
return handle_close_generic_pre(dcontext, (uint)sys_param(dcontext, 0),
true );
}
#ifdef SYS_close_range
static bool
handle_close_range_pre(dcontext_t *dcontext, file_t fd)
{
return handle_close_generic_pre(dcontext, fd, false );
}
#endif
* is currently in a syscall handler.
* Alternatively for sysenter we could set app_sysenter_instr_addr for Linux.
*/
#define SYSCALL_PC(dc) \
((get_syscall_method() == SYSCALL_METHOD_INT || \
get_syscall_method() == SYSCALL_METHOD_SYSCALL) \
? (ASSERT(SYSCALL_LENGTH == INT_LENGTH), POST_SYSCALL_PC(dc) - INT_LENGTH) \
: (vsyscall_syscall_end_pc - SYSENTER_LENGTH))
static void
handle_exit(dcontext_t *dcontext)
{
priv_mcontext_t *mc = get_mcontext(dcontext);
bool exit_process = false;
if (dcontext->sys_num == SYSNUM_EXIT_PROCESS) {
* We need to know whether this is the only group left.
* FIXME: we can have races where new threads are created after our
* check: we'll live with that for now, but the right approach is to
* suspend all threads via synch_with_all_threads(), do the check,
* and if exit_process then exit w/o resuming: though have to
* coordinate lock access w/ cleanup_and_terminate.
* Xref i#94. Xref PR 541760.
*/
process_id_t mypid = get_process_id();
thread_record_t **threads;
int num_threads, i;
exit_process = true;
d_r_mutex_lock(&thread_initexit_lock);
get_list_of_threads(&threads, &num_threads);
for (i = 0; i < num_threads; i++) {
if (threads[i]->pid != mypid && !IS_CLIENT_THREAD(threads[i]->dcontext)) {
exit_process = false;
break;
}
}
if (!exit_process) {
thread_id_t myid = d_r_get_thread_id();
priv_mcontext_t mcontext;
DEBUG_DECLARE(thread_synch_result_t synch_res;)
LOG(THREAD, LOG_TOP | LOG_SYSCALLS, 1,
"SYS_exit_group %d not final group: %d cleaning up just "
"threads in group\n",
get_process_id(), d_r_get_thread_id());
copy_mcontext(mc, &mcontext);
mc->pc = SYSCALL_PC(dcontext);
for (i = 0; i < num_threads; i++) {
if (threads[i]->id != myid && threads[i]->pid == mypid) {
* to make cleanup easier, but may want to switch to shifting
* the target thread to a stack-free loop.
*/
DEBUG_DECLARE(synch_res =)
synch_with_thread(
threads[i]->id, true , true ,
THREAD_SYNCH_VALID_MCONTEXT, THREAD_SYNCH_TERMINATED_AND_CLEANED,
THREAD_SYNCH_SUSPEND_FAILURE_IGNORE);
* doing the synch so we may have races where the thread
* exits on its own (or new threads appear): we'll live
* with those for now.
*/
ASSERT(synch_res == THREAD_SYNCH_RESULT_SUCCESS);
}
}
copy_mcontext(&mcontext, mc);
}
d_r_mutex_unlock(&thread_initexit_lock);
global_heap_free(
threads, num_threads * sizeof(thread_record_t *) HEAPACCT(ACCT_THREAD_MGT));
}
if (is_last_app_thread() && !dynamo_exited) {
LOG(THREAD, LOG_TOP | LOG_SYSCALLS, 1,
"SYS_exit%s(%d) in final thread " TIDFMT " of " PIDFMT
" => exiting DynamoRIO\n",
(dcontext->sys_num == SYSNUM_EXIT_PROCESS) ? "_group" : "",
MCXT_SYSNUM_REG(mc), d_r_get_thread_id(), get_process_id());
automatic_startup = true;
exit_process = true;
} else {
LOG(THREAD, LOG_TOP | LOG_THREADS | LOG_SYSCALLS, 1,
"SYS_exit%s(%d) in thread " TIDFMT " of " PIDFMT " => cleaning up %s\n",
(dcontext->sys_num == SYSNUM_EXIT_PROCESS) ? "_group" : "",
MCXT_SYSNUM_REG(mc), d_r_get_thread_id(), get_process_id(),
exit_process ? "process" : "thread");
}
KSTOP(num_exits_dir_syscall);
block_cleanup_and_terminate(dcontext, MCXT_SYSNUM_REG(mc), sys_param(dcontext, 0),
sys_param(dcontext, 1), exit_process,
sys_param(dcontext, 2), sys_param(dcontext, 3));
}
#if defined(LINUX) && defined(X86)
static bool
os_set_app_thread_area(dcontext_t *dcontext, our_modify_ldt_t *user_desc)
{
# ifdef X86
int i;
os_thread_data_t *ostd = dcontext->os_field;
our_modify_ldt_t *desc = (our_modify_ldt_t *)ostd->app_thread_areas;
if (user_desc->seg_not_present == 1) {
for (i = 0; i < GDT_NUM_TLS_SLOTS; i++) {
if (desc[i].seg_not_present == 1)
break;
}
if (i < GDT_NUM_TLS_SLOTS) {
user_desc->entry_number = GDT_SELECTOR(i + tls_min_index());
memcpy(&desc[i], user_desc, sizeof(*user_desc));
} else
return false;
} else {
* direct the app to use the GDT entry we already set up for our private
* libraries, but only the first time it requests TLS.
*/
if (user_desc->entry_number == -1 && return_stolen_lib_tls_gdt) {
d_r_mutex_lock(&set_thread_area_lock);
if (return_stolen_lib_tls_gdt) {
uint selector = read_thread_register(LIB_SEG_TLS);
uint index = SELECTOR_INDEX(selector);
SELF_UNPROTECT_DATASEC(DATASEC_RARELY_PROT);
return_stolen_lib_tls_gdt = false;
SELF_PROTECT_DATASEC(DATASEC_RARELY_PROT);
user_desc->entry_number = index;
LOG(GLOBAL, LOG_THREADS, 2,
"%s: directing app to use "
"selector 0x%x for first call to set_thread_area\n",
__FUNCTION__, selector);
}
d_r_mutex_unlock(&set_thread_area_lock);
}
i = user_desc->entry_number - tls_min_index();
if (i < 0 || i >= GDT_NUM_TLS_SLOTS)
return false;
LOG(GLOBAL, LOG_THREADS, 2,
"%s: change selector 0x%x base from " PFX " to " PFX "\n", __FUNCTION__,
GDT_SELECTOR(user_desc->entry_number), desc[i].base_addr,
user_desc->base_addr);
memcpy(&desc[i], user_desc, sizeof(*user_desc));
}
if (!INTERNAL_OPTION(private_loader) &&
GDT_SELECTOR(user_desc->entry_number) != read_thread_register(SEG_TLS) &&
GDT_SELECTOR(user_desc->entry_number) != read_thread_register(LIB_SEG_TLS))
return false;
# elif defined(ARM)
ASSERT_NOT_IMPLEMENTED(false);
# endif
return true;
}
static bool
os_get_app_thread_area(dcontext_t *dcontext, our_modify_ldt_t *user_desc)
{
# ifdef X86
os_thread_data_t *ostd = (os_thread_data_t *)dcontext->os_field;
our_modify_ldt_t *desc = (our_modify_ldt_t *)ostd->app_thread_areas;
int i = user_desc->entry_number - tls_min_index();
if (i < 0 || i >= GDT_NUM_TLS_SLOTS)
return false;
if (desc[i].seg_not_present == 1)
return false;
# elif defined(ARM)
ASSERT_NOT_IMPLEMENTED(false);
# endif
return true;
}
#endif
* We switch to app's lib tls seg before thread creation system call, i.e.
* clone and vfork, and switch back to dr's lib tls seg after the system call.
* They are only called on parent thread, not the child thread.
* The child thread's tls is setup in os_tls_app_seg_init.
*/
* descriptor. If using dr's segment descriptor, the created thread will have
* access violation for tls not being setup. However, it works fine if we switch
* the descriptor to app's segment descriptor before creating the thread.
* We should be able to remove this function later if we find the problem.
*/
static bool
os_switch_lib_tls(dcontext_t *dcontext, bool to_app)
{
return os_switch_seg_to_context(dcontext, LIB_SEG_TLS, to_app);
}
#ifdef X86
static bool
os_switch_seg_to_base(dcontext_t *dcontext, os_local_state_t *os_tls, reg_id_t seg,
bool to_app, app_pc base)
{
bool res = false;
ASSERT(dcontext != NULL);
ASSERT(IF_X86_ELSE((seg == SEG_FS || seg == SEG_GS),
(seg == DR_REG_TPIDRURW || DR_REG_TPIDRURO)));
switch (os_tls->tls_type) {
# if defined(X64) && !defined(MACOS)
case TLS_TYPE_ARCH_PRCTL: {
res = tls_set_fs_gs_segment_base(os_tls->tls_type, seg, base, NULL);
ASSERT(res);
LOG(GLOBAL, LOG_THREADS, 2,
"%s %s: arch_prctl successful for thread " TIDFMT " base " PFX "\n",
__FUNCTION__, to_app ? "to app" : "to DR", d_r_get_thread_id(), base);
if (seg == SEG_TLS && base == NULL) {
reg_t zero = 0;
WRITE_DR_SEG(zero);
}
break;
}
# endif
case TLS_TYPE_GDT: {
our_modify_ldt_t desc;
uint index;
uint selector;
if (to_app) {
selector = os_tls->app_lib_tls_reg;
index = SELECTOR_INDEX(selector);
} else {
index = (seg == LIB_SEG_TLS ? tls_priv_lib_index() : tls_dr_index());
ASSERT(index != -1 && "TLS indices not initialized");
selector = GDT_SELECTOR(index);
}
if (selector != 0) {
if (to_app) {
our_modify_ldt_t *areas =
((os_thread_data_t *)dcontext->os_field)->app_thread_areas;
ASSERT((index >= tls_min_index()) &&
((index - tls_min_index()) <= GDT_NUM_TLS_SLOTS));
desc = areas[index - tls_min_index()];
} else {
tls_init_descriptor(&desc, base, GDT_NO_SIZE_LIMIT, index);
}
res = tls_set_fs_gs_segment_base(os_tls->tls_type, seg, NULL, &desc);
ASSERT(res);
} else {
* don't need to call set_thread_area.
*/
res = true;
}
if (seg == SEG_TLS)
WRITE_DR_SEG(selector);
else
WRITE_LIB_SEG(selector);
LOG(THREAD, LOG_LOADER, 2, "%s: switching to %s, setting %s to 0x%x\n",
__FUNCTION__, (to_app ? "app" : "dr"), reg_names[seg], selector);
LOG(THREAD, LOG_LOADER, 2,
"%s %s: set_thread_area successful for thread " TIDFMT " base " PFX "\n",
__FUNCTION__, to_app ? "to app" : "to DR", d_r_get_thread_id(), base);
break;
}
case TLS_TYPE_LDT: {
uint index;
uint selector;
if (to_app) {
selector = os_tls->app_lib_tls_reg;
index = SELECTOR_INDEX(selector);
} else {
index = (seg == LIB_SEG_TLS ? tls_priv_lib_index() : tls_dr_index());
ASSERT(index != -1 && "TLS indices not initialized");
selector = LDT_SELECTOR(index);
}
LOG(THREAD, LOG_LOADER, 2, "%s: switching to %s, setting %s to 0x%x\n",
__FUNCTION__, (to_app ? "app" : "dr"), reg_names[seg], selector);
if (seg == SEG_TLS)
WRITE_DR_SEG(selector);
else
WRITE_LIB_SEG(selector);
LOG(THREAD, LOG_LOADER, 2,
"%s %s: ldt selector swap successful for thread " TIDFMT "\n", __FUNCTION__,
to_app ? "to app" : "to DR", d_r_get_thread_id());
break;
}
default: ASSERT_NOT_REACHED(); return false;
}
ASSERT((!to_app && seg == SEG_TLS) ||
BOOLS_MATCH(to_app, os_using_app_state(dcontext)));
return res;
}
static bool
os_set_dr_tls_base(dcontext_t *dcontext, os_local_state_t *tls, byte *base)
{
if (tls == NULL) {
ASSERT(dcontext != NULL);
tls = get_os_tls_from_dc(dcontext);
}
return os_switch_seg_to_base(dcontext, tls, SEG_TLS, false, base);
}
#endif
static bool
os_switch_seg_to_context(dcontext_t *dcontext, reg_id_t seg, bool to_app)
{
os_local_state_t *os_tls = get_os_tls_from_dc(dcontext);
#ifdef X86
app_pc base;
ASSERT_MESSAGE(CHKLVL_ASSERTS + 1 , "can only act on executing thread",
* invalid TLS, so we don't check get_thread_private_dcontext().
*/
is_thread_tls_allocated() &&
dcontext->owning_thread == get_sys_thread_id());
if (to_app) {
base = os_get_app_tls_base(dcontext, seg);
} else {
base = os_get_priv_tls_base(dcontext, seg);
}
return os_switch_seg_to_base(dcontext, os_tls, seg, to_app, base);
#elif defined(AARCHXX)
bool res = false;
os_thread_data_t *ostd = (os_thread_data_t *)dcontext->os_field;
ASSERT(INTERNAL_OPTION(private_loader));
if (to_app) {
ptr_uint_t cur_seg = read_thread_register(LIB_SEG_TLS);
if ((void *)cur_seg == os_tls->app_lib_tls_base)
return true;
bool app_mem_valid = true;
if (os_tls->app_lib_tls_base == NULL)
app_mem_valid = false;
else {
uint prot;
bool rc = get_memory_info(os_tls->app_lib_tls_base, NULL, NULL, &prot);
* common.allasm_aarch_isa.
* Also rule out an unwritable region, which seems to happen on arm
* where at process init the thread reg points at rodata in libc
* until properly set to a writable mmap later.
*/
if (!rc || !TESTALL(MEMPROT_READ | MEMPROT_WRITE, prot))
app_mem_valid = false;
}
if (!app_mem_valid) {
* thread register value. For detach we would like to write a 0 back into
* the thread register, but it complicates our exit code, which wants access
* to DR's TLS between dynamo_thread_exit_common()'s call to
* dynamo_thread_not_under_dynamo() and its call to
* set_thread_private_dcontext(NULL). For now we just leave our privlib
* segment in there. It seems rather unlikely to cause a problem: app code
* is unlikely to read the thread register; it's going to assume it owns it
* and will just blindly write to it.
*/
return true;
}
* at the same offset so it can be loaded on entering code cache.
* Otherwise, the context switch code on entering fcache will fault on
* accessing DR's TLS.
* The app's TLS slot value is stored into privlib's TLS slot for
* later restore on switching back to privlib's TLS.
*/
byte **priv_lib_tls_swap_slot =
(byte **)(ostd->priv_lib_tls_base + DR_TLS_BASE_OFFSET);
byte **app_lib_tls_swap_slot =
(byte **)(os_tls->app_lib_tls_base + DR_TLS_BASE_OFFSET);
LOG(THREAD, LOG_LOADER, 3,
"%s: switching to app: app slot=&" PFX " *" PFX ", priv slot=&" PFX " *" PFX
"\n",
__FUNCTION__, app_lib_tls_swap_slot, *app_lib_tls_swap_slot,
priv_lib_tls_swap_slot, *priv_lib_tls_swap_slot);
byte *dr_tls_base = *priv_lib_tls_swap_slot;
*priv_lib_tls_swap_slot = *app_lib_tls_swap_slot;
*app_lib_tls_swap_slot = dr_tls_base;
LOG(THREAD, LOG_LOADER, 2, "%s: switching to %s, setting coproc reg to 0x%x\n",
__FUNCTION__, (to_app ? "app" : "dr"), os_tls->app_lib_tls_base);
res = write_thread_register(os_tls->app_lib_tls_base);
} else {
ptr_uint_t cur_seg = read_thread_register(LIB_SEG_TLS);
if ((void *)cur_seg == ostd->priv_lib_tls_base)
return true;
* and put DR's TLS base back to privlib's TLS slot.
*/
byte **priv_lib_tls_swap_slot =
(byte **)(ostd->priv_lib_tls_base + DR_TLS_BASE_OFFSET);
byte **app_lib_tls_swap_slot =
(byte **)(os_tls->app_lib_tls_base + DR_TLS_BASE_OFFSET);
byte *dr_tls_base = *app_lib_tls_swap_slot;
LOG(THREAD, LOG_LOADER, 3,
"%s: switching to DR: app slot=&" PFX " *" PFX ", priv slot=&" PFX " *" PFX
"\n",
__FUNCTION__, app_lib_tls_swap_slot, *app_lib_tls_swap_slot,
priv_lib_tls_swap_slot, *priv_lib_tls_swap_slot);
*app_lib_tls_swap_slot = *priv_lib_tls_swap_slot;
*priv_lib_tls_swap_slot = dr_tls_base;
LOG(THREAD, LOG_LOADER, 2, "%s: switching to %s, setting coproc reg to 0x%x\n",
__FUNCTION__, (to_app ? "app" : "dr"), ostd->priv_lib_tls_base);
res = write_thread_register(ostd->priv_lib_tls_base);
}
LOG(THREAD, LOG_LOADER, 2, "%s %s: set_tls swap success=%d for thread " TIDFMT "\n",
__FUNCTION__, to_app ? "to app" : "to DR", res, d_r_get_thread_id());
return res;
#elif defined(RISCV64)
ASSERT_NOT_IMPLEMENTED(false);
(void)os_tls;
return false;
#endif
}
#ifdef LINUX
static bool
handle_clone_pre(dcontext_t *dcontext)
{
* 32-bit params: flags, newsp, ptid, tls, ctid
* 64-bit params: should be the same yet tls (for ARCH_SET_FS) is in r8?!?
* I don't see how sys_clone gets its special args: shouldn't it
* just get pt_regs as a "special system call"?
* sys_clone(unsigned long clone_flags, unsigned long newsp,
* void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
*/
uint64_t flags;
* reasons: to ensure that the app-provided clone_args is readable
* without any fault, and to avoid modifying the app's clone_args in the
* is_thread_create_syscall case (see below).
*/
clone3_syscall_args_t *dr_clone_args = NULL, *app_clone_args = NULL;
uint app_clone_args_size = 0;
if (dcontext->sys_num == SYS_clone3) {
if (is_clone3_enosys) {
* handling and fail early.
*/
LOG(THREAD, LOG_SYSCALLS, 2, "\treturning ENOSYS to app for clone3\n");
set_failure_return_val(dcontext, ENOSYS);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
return false;
}
app_clone_args_size =
(uint)sys_param(dcontext, SYSCALL_PARAM_CLONE3_CLONE_ARGS_SIZE);
if (app_clone_args_size < CLONE_ARGS_SIZE_VER0) {
LOG(THREAD, LOG_SYSCALLS, 2, "\treturning EINVAL to app for clone3\n");
set_failure_return_val(dcontext, EINVAL);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
return false;
}
app_clone_args =
(clone3_syscall_args_t *)sys_param(dcontext, SYSCALL_PARAM_CLONE3_CLONE_ARGS);
* less/more fields than DR's internal struct (clone3_syscall_args_t).
* For creating DR's copy of the app's clone_args object, we need to
* allocate as much space as specified by the app in the clone3
* syscall's args.
*/
dr_clone_args = (clone3_syscall_args_t *)heap_alloc(
dcontext, app_clone_args_size HEAPACCT(ACCT_OTHER));
if (!d_r_safe_read(app_clone_args, app_clone_args_size, dr_clone_args)) {
LOG(THREAD, LOG_SYSCALLS, 2, "\treturning EFAULT to app for clone3\n");
set_failure_return_val(dcontext, EFAULT);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
heap_free(dcontext, dr_clone_args, app_clone_args_size HEAPACCT(ACCT_OTHER));
return false;
}
flags = dr_clone_args->flags;
* post-syscall.
*/
dcontext->sys_param0 = (reg_t)app_clone_args;
dcontext->sys_param1 = (reg_t)dr_clone_args;
dcontext->sys_param2 = (reg_t)app_clone_args_size;
* two reg_t vars on 32-bit. We do it on 64-bit systems as well for simpler code.
*/
dcontext->sys_param3 = (reg_t)(flags & CLONE3_FLAGS_4_BYTE_MASK);
ASSERT((flags >> 32 & ~CLONE3_FLAGS_4_BYTE_MASK) == 0);
dcontext->sys_param4 = (reg_t)((flags >> 32));
LOG(THREAD, LOG_SYSCALLS, 2,
"syscall: clone3 with args: flags = 0x" HEX64_FORMAT_STRING
", exit_signal = 0x" HEX64_FORMAT_STRING ", stack = 0x" HEX64_FORMAT_STRING
", stack_size = 0x" HEX64_FORMAT_STRING "\n",
dr_clone_args->flags, dr_clone_args->exit_signal, dr_clone_args->stack,
dr_clone_args->stack_size);
} else {
flags = (uint)sys_param(dcontext, 0);
* Unlike clone3, here the flags are 32-bit, so truncation is okay.
*/
dcontext->sys_param0 = (reg_t)flags;
LOG(THREAD, LOG_SYSCALLS, 2,
"syscall: clone with args: flags = " PFX ", stack = " PFX
", tid_field_parent = " PFX ", tid_field_child = " PFX ", thread_ptr = " PFX
"\n",
sys_param(dcontext, 0), sys_param(dcontext, 1), sys_param(dcontext, 2),
sys_param(dcontext, 3), sys_param(dcontext, 4));
}
handle_clone(dcontext, flags);
if ((flags & CLONE_VM) == 0) {
LOG(THREAD, LOG_SYSCALLS, 1, "\tWARNING: CLONE_VM not set!\n");
}
* clean those up before they get reused by a new thread.
* XXX: Ideally we'd do this in fd_table_add(), but we can't acquire
* thread_initexit_lock there.
*/
cleanup_after_vfork_execve(dcontext);
* containing the pc after the app's syscall instr and other data
* (see i#27) is placed at the bottom of the dstack (which is allocated
* by create_clone_record() - it also saves app stack and switches
* to dstack). xref i#149/PR 403015.
* Note: This must be done after sys_param0 is set.
*/
if (is_thread_create_syscall(dcontext, dr_clone_args)) {
if (dcontext->sys_num == SYS_clone3) {
* clone3 syscall. Instead of reusing the app's copy of
* clone_args and modifying it, we choose to use our own copy.
* Under CLONE_VM, the parent and child threads have a pointer to
* the same app clone_args. By using our own copy of clone_args
* for the syscall, we obviate the need to restore the modified
* fields in the app's copy after the syscall in either the parent
* or the child thread, which can be racy under CLONE_VM as the
* parent and/or child threads may need to access/modify it. By
* using a copy instead, both parent and child threads only
* need to restore their own SYSCALL_PARAM_CLONE3_CLONE_ARGS reg
* to the pointer to the app's clone_args. It is saved in the
* clone record for the child thread, and in sys_param0 for the
* parent thread. The DR copy of clone_args is freed by the parent
* thread in the post-syscall handling of clone3; as it is used
* only by the parent thread, there is no use-after-free danger here.
*/
ASSERT(app_clone_args != NULL && dr_clone_args != NULL);
*sys_param_addr(dcontext, SYSCALL_PARAM_CLONE3_CLONE_ARGS) =
(reg_t)dr_clone_args;
create_clone_record(dcontext, NULL, dr_clone_args, app_clone_args);
} else {
* pointer in create_clone_record. Save the original pointer so
* that we can restore it post-syscall in the parent. The same is
* restored in the child in restore_clone_param_from_clone_record.
*/
dcontext->sys_param1 = sys_param(dcontext, SYSCALL_PARAM_CLONE_STACK);
create_clone_record(dcontext,
sys_param_addr(dcontext, SYSCALL_PARAM_CLONE_STACK), NULL,
NULL);
}
os_clone_pre(dcontext);
os_new_thread_pre();
} else {
if (dcontext->sys_num == SYS_clone3) {
* it in the parent *and* the child later.
*/
ASSERT(app_clone_args_size == (uint)dcontext->sys_param2);
ASSERT(dr_clone_args == (clone3_syscall_args_t *)dcontext->sys_param1);
heap_free(dcontext, dr_clone_args, app_clone_args_size HEAPACCT(ACCT_OTHER));
dcontext->sys_param1 = 0;
dcontext->sys_param2 = 0;
}
os_fork_pre(dcontext);
}
return true;
}
#endif
* Arguments come from the pusha right before the call
*/
* call handlers do not examine or modify fcache or its fragments in any
* way except for calling flush_fragments_and_remove_region!
*/
* must set mcontext's register for that.
*/
* post_system_call() will *not* be called!).
* Returns true if system call should go ahead
*/
*/
bool
pre_system_call(dcontext_t *dcontext)
{
priv_mcontext_t *mc = get_mcontext(dcontext);
bool execute_syscall = true;
dr_where_am_i_t old_whereami = dcontext->whereami;
dcontext->whereami = DR_WHERE_SYSCALL_HANDLER;
* can determine a priori will fail. Once we do, we will set the
* expect_last_syscall_to_fail to true for those case, and can
* confirm in post_system_call() that the syscall failed as
* expected.
*/
DODEBUG(dcontext->expect_last_syscall_to_fail = false;);
* in syscall itself)
*/
dcontext->sys_num = os_normalized_sysnum((int)MCXT_SYSNUM_REG(mc), NULL, dcontext);
RSTATS_INC(pre_syscall);
DOSTATS({
if (ignorable_system_call_normalized(dcontext->sys_num))
STATS_INC(pre_syscall_ignorable);
});
LOG(THREAD, LOG_SYSCALLS, 2, "system call %d\n", dcontext->sys_num);
#if defined(LINUX) && defined(X86)
* we fall back on int, but we have to tweak syscall param #5 (ebp)
* Once we have PR 288330 we can remove this.
*/
if (should_syscall_method_be_sysenter() && !dcontext->sys_was_int) {
dcontext->sys_xbp = mc->xbp;
* every single system call on systems where we can't hook vsyscall!)
*/
TRY_EXCEPT(
dcontext, { mc->xbp = *(reg_t *)mc->xsp; },
{
ASSERT_NOT_REACHED();
mc->xbp = 0;
});
}
#endif
switch (dcontext->sys_num) {
case SYSNUM_EXIT_PROCESS:
#if defined(LINUX) && VMX86_SERVER
if (os_in_vmkernel_32bit()) {
LOG(THREAD, LOG_SYSCALLS, 2, "on esx35 => ignoring exitgroup\n");
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
break;
}
#endif
case SYSNUM_EXIT_THREAD: {
handle_exit(dcontext);
break;
}
#if defined(LINUX) && !defined(X64) && !defined(ARM)
case SYS_mmap: {
asmlinkage int old_mmap(struct mmap_arg_struct_t *arg)
*/
mmap_arg_struct_t *arg = (mmap_arg_struct_t *)sys_param(dcontext, 0);
mmap_arg_struct_t arg_buf;
if (d_r_safe_read(arg, sizeof(mmap_arg_struct_t), &arg_buf)) {
void *addr = (void *)arg->addr;
size_t len = (size_t)arg->len;
uint prot = (uint)arg->prot;
LOG(THREAD, LOG_SYSCALLS, 2,
"syscall: mmap addr=" PFX " size=" PIFX " prot=0x%x"
" flags=" PIFX " offset=" PIFX " fd=%d\n",
addr, len, prot, arg->flags, arg->offset, arg->fd);
if (addr != NULL &&
!app_memory_pre_alloc(dcontext, addr, len, osprot_to_memprot(prot),
!TEST(MAP_FIXED, arg->flags),
false ,
false )) {
* remove the hint -- but we don't want to write to app memory, so
* we do fail. We could set up our own mmap_arg_struct_t but
* we'd need dedicate per-thread storage, and SYS_mmap is obsolete.
*/
execute_syscall = false;
set_failure_return_val(dcontext, ENOMEM);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
break;
}
}
dcontext->sys_param0 = (reg_t)arg;
break;
}
#endif
case IF_MACOS_ELSE(SYS_mmap, IF_X64_ELSE(SYS_mmap, SYS_mmap2)): {
asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long pgoff)
*/
void *addr = (void *)sys_param(dcontext, 0);
size_t len = (size_t)sys_param(dcontext, 1);
uint prot = (uint)sys_param(dcontext, 2);
uint flags = (uint)sys_param(dcontext, 3);
LOG(THREAD, LOG_SYSCALLS, 2,
"syscall: mmap2 addr=" PFX " size=" PIFX " prot=0x%x"
" flags=" PIFX " offset=" PIFX " fd=%d\n",
addr, len, prot, flags, sys_param(dcontext, 5), sys_param(dcontext, 4));
* (typical for 1st mmap).
*/
bool image = addr != NULL && !TEST(MAP_ANONYMOUS, flags) &&
mmap_check_for_module_overlap(addr, len, TEST(PROT_READ, prot), 0, true);
if (addr != NULL &&
!app_memory_pre_alloc(dcontext, addr, len, osprot_to_memprot(prot),
!TEST(MAP_FIXED, flags), false ,
image )) {
if (!TEST(MAP_FIXED, flags)) {
* the hint which should eliminate any overlap.
*/
*sys_param_addr(dcontext, 0) = 0;
} else {
execute_syscall = false;
set_failure_return_val(dcontext, ENOMEM);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
break;
}
}
dcontext->sys_param0 = (reg_t)addr;
dcontext->sys_param1 = len;
dcontext->sys_param2 = prot;
dcontext->sys_param3 = flags;
break;
}
case SYS_munmap: {
asmlinkage long sys_munmap(unsigned long addr, uint len)
*/
app_pc addr = (void *)sys_param(dcontext, 0);
size_t len = (size_t)sys_param(dcontext, 1);
LOG(THREAD, LOG_SYSCALLS, 2, "syscall: munmap addr=" PFX " size=" PFX "\n", addr,
len);
RSTATS_INC(num_app_munmaps);
* could detect that condition here and set
* expect_last_syscall_to_fail.
*/
dcontext->sys_param0 = (reg_t)addr;
dcontext->sys_param1 = len;
* and remove the region from exec areas and flush all fragments
* prior to issuing the syscall. If the unmap fails, we try to
* recover in post_system_call() by re-adding the region. This
* approach has its shortcomings -- see comments below in
* post_system_call().
*/
os_get_module_info_lock();
if (module_overlaps(addr, len)) {
* part of a module (for which case should adjust view size? or treat as full
* unmap?). Theoretical for now as we haven't seen this. */
module_area_t *ma = module_pc_lookup(addr);
ASSERT_CURIOSITY(ma != NULL);
ASSERT_CURIOSITY(addr == ma->start);
ASSERT_CURIOSITY((app_pc)ALIGN_FORWARD(addr + len, PAGE_SIZE) == ma->end);
os_get_module_info_unlock();
* we only think a module is removed if its first memory region
* is unloaded (unmapped).
* XREF i#160 to fix the real problem of handling module splitting.
*/
if (ma != NULL && ma->start == addr)
module_list_remove(addr, ALIGN_FORWARD(len, PAGE_SIZE));
} else
os_get_module_info_unlock();
app_memory_deallocation(dcontext, (app_pc)addr, len,
false ,
true );
#ifndef HAVE_MEMINFO_QUERY
memcache_lock();
memcache_remove(addr, addr + len);
memcache_unlock();
#endif
break;
}
#ifdef LINUX
case SYS_mremap: {
asmlinkage unsigned long sys_mremap(unsigned long addr,
unsigned long old_len, unsigned long new_len,
unsigned long flags, unsigned long new_addr)
*/
dr_mem_info_t info;
app_pc addr = (void *)sys_param(dcontext, 0);
size_t old_len = (size_t)sys_param(dcontext, 1);
size_t new_len = (size_t)sys_param(dcontext, 2);
DEBUG_DECLARE(bool ok;)
LOG(THREAD, LOG_SYSCALLS, 2, "syscall: mremap addr=" PFX " size=" PFX "\n", addr,
old_len);
dcontext->sys_param0 = (reg_t)addr;
dcontext->sys_param1 = old_len;
dcontext->sys_param2 = new_len;
* we need memory type and prot to set the
* new memory region in the post_system_call
*/
DEBUG_DECLARE(ok =)
query_memory_ex(addr, &info);
ASSERT(ok);
dcontext->sys_param3 = info.prot;
dcontext->sys_param4 = info.type;
DOCHECK(1, {
os_get_module_info_lock();
ASSERT_CURIOSITY(!module_overlaps(addr, old_len));
os_get_module_info_unlock();
});
break;
}
#endif
case SYS_mprotect: {
asmlinkage long sys_mprotect(unsigned long start, uint len,
unsigned long prot)
*/
uint res;
DEBUG_DECLARE(size_t size;)
app_pc addr = (void *)sys_param(dcontext, 0);
size_t len = (size_t)sys_param(dcontext, 1);
uint prot = (uint)sys_param(dcontext, 2);
uint old_memprot = MEMPROT_NONE, new_memprot;
bool exists UNUSED = true;
dcontext->sys_param0 = (reg_t)addr;
dcontext->sys_param1 = len;
dcontext->sys_param2 = prot;
LOG(THREAD, LOG_SYSCALLS, 2,
"syscall: mprotect addr=" PFX " size=" PFX " prot=%s\n", addr, len,
memprot_string(osprot_to_memprot(prot)));
if (!get_memory_info(addr, NULL, IF_DEBUG_ELSE(&size, NULL), &old_memprot)) {
exists = false;
* this should fail with ENOMEM. We used to force-fail it to avoid
* asserts in our own allmem update code, but there are cases where a
* seemingly unmapped page succeeds (i#1912: next page of grows-down
* initial stack). Thus we let it go through.
*/
LOG(THREAD, LOG_SYSCALLS, 2,
"\t" PFX " isn't mapped: probably mprotect will fail\n", addr);
} else {
* spans 2 or more vmareas with dissimilar protection (xref
* PR 410921) or has unallocated regions in between (PR 413109).
*/
DOCHECK(1, dcontext->mprot_multi_areas = len > size ? true : false;);
}
new_memprot = osprot_to_memprot(prot) |
(old_memprot & MEMPROT_META_FLAGS);
res = app_memory_protection_change(dcontext, addr, len, new_memprot, &new_memprot,
NULL, false );
if (res != DO_APP_MEM_PROT_CHANGE) {
if (res == FAIL_APP_MEM_PROT_CHANGE) {
ASSERT_NOT_IMPLEMENTED(false);
} else {
ASSERT_NOT_IMPLEMENTED(res != SUBSET_APP_MEM_PROT_CHANGE);
ASSERT_NOT_REACHED();
}
execute_syscall = false;
} else {
IF_NO_MEMQUERY(memcache_update_locked(addr, addr + len, new_memprot,
-1 , exists));
}
break;
}
#ifdef ANDROID
case SYS_prctl:
dcontext->sys_param0 = sys_param(dcontext, 0);
dcontext->sys_param1 = sys_param(dcontext, 1);
dcontext->sys_param2 = sys_param(dcontext, 2);
dcontext->sys_param3 = sys_param(dcontext, 3);
dcontext->sys_param4 = sys_param(dcontext, 4);
break;
#endif
#ifdef LINUX
case SYS_brk: {
if (DYNAMO_OPTION(emulate_brk)) {
byte *new_val = (byte *)sys_param(dcontext, 0);
byte *res = emulate_app_brk(dcontext, new_val);
execute_syscall = false;
set_success_return_val(dcontext, (reg_t)res);
} else {
* We store the old break in the param1 slot.
*/
DODEBUG(dcontext->sys_param0 = (reg_t)sys_param(dcontext, 0););
dcontext->sys_param1 = dynamorio_syscall(SYS_brk, 1, 0);
}
break;
}
# ifdef SYS_uselib
case SYS_uselib: {
* Was primarily used when statically linking to dynamically loaded shared
* libraries that were loaded at known locations. Shouldn't be used by
* applications using the dynamic loader (ld) which is currently the only
* way we can inject so we don't expect to see this. PR 307621. */
ASSERT_NOT_IMPLEMENTED(false);
break;
}
# endif
#endif
#ifdef LINUX
case SYS_clone3:
case SYS_clone: execute_syscall = handle_clone_pre(dcontext); break;
#elif defined(MACOS)
case SYS_bsdthread_create: {
* bsdthread_register in order to capture workqueue threads.
* For now we settle for intercepting bsd threads at the user thread func.
* We miss a little user-mode code but this is enough to get started.
*/
app_pc func = (app_pc)sys_param(dcontext, 0);
void *func_arg = (void *)sys_param(dcontext, 1);
void *clone_rec;
LOG(THREAD, LOG_SYSCALLS, 1,
"bsdthread_create: thread func " PFX ", arg " PFX "\n", func, func_arg);
handle_clone(dcontext, CLONE_THREAD | CLONE_VM | CLONE_SIGHAND | SIGCHLD);
clone_rec = create_clone_record(dcontext, NULL, func, func_arg);
dcontext->sys_param0 = (reg_t)func;
dcontext->sys_param1 = (reg_t)func_arg;
*sys_param_addr(dcontext, 0) = (reg_t)new_bsdthread_intercept;
*sys_param_addr(dcontext, 1) = (reg_t)clone_rec;
os_new_thread_pre();
break;
}
case SYS_posix_spawn: {
ASSERT_NOT_IMPLEMENTED(false);
break;
}
#endif
#ifdef SYS_vfork
case SYS_vfork: {
uint flags = CLONE_VFORK | CLONE_VM | SIGCHLD;
LOG(THREAD, LOG_SYSCALLS, 2, "syscall: vfork\n");
handle_clone(dcontext, flags);
cleanup_after_vfork_execve(dcontext);
dcontext->sys_param0 = (reg_t)flags;
* structure to child. See SYS_clone for info about i#149/PR 403015.
*/
IF_LINUX(ASSERT(is_thread_create_syscall(dcontext, NULL)));
dcontext->sys_param1 = mc->xsp;
# ifdef MACOS
create_clone_record(dcontext, (reg_t *)&mc->xsp, NULL, NULL);
# else
create_clone_record(dcontext, (reg_t *)&mc->xsp , NULL,
NULL);
# endif
os_clone_pre(dcontext);
os_new_thread_pre();
break;
}
#endif
#ifdef SYS_fork
case SYS_fork: {
LOG(THREAD, LOG_SYSCALLS, 2, "syscall: fork\n");
os_fork_pre(dcontext);
break;
}
#endif
case SYS_execve: {
int ret = handle_execve(dcontext);
if (ret != 0) {
execute_syscall = false;
set_failure_return_val(dcontext, ret);
}
break;
}
case IF_MACOS_ELSE(SYS_sigaction, SYS_rt_sigaction): {
asmlinkage long
sys_rt_sigaction(int sig, const struct sigaction *act,
struct sigaction *oact, size_t sigsetsize)
*/
int sig = (int)sys_param(dcontext, 0);
const kernel_sigaction_t *act =
(const kernel_sigaction_t *)sys_param(dcontext, 1);
prev_sigaction_t *oact = (prev_sigaction_t *)sys_param(dcontext, 2);
size_t sigsetsize = (size_t)
* closer to rt_ than non-rt_ below).
*/
IF_MACOS_ELSE(sizeof(kernel_sigset_t), sys_param(dcontext, 3));
uint res;
LOG(THREAD, LOG_SYSCALLS, 2, "syscall: %ssigaction %d " PFX " " PFX " %d\n",
IF_MACOS_ELSE("", "rt_"), sig, act, oact, sigsetsize);
dcontext->sys_param0 = (reg_t)sig;
dcontext->sys_param1 = (reg_t)act;
dcontext->sys_param2 = (reg_t)oact;
dcontext->sys_param3 = (reg_t)sigsetsize;
execute_syscall = handle_sigaction(dcontext, sig, act, oact, sigsetsize, &res);
if (!execute_syscall) {
LOG(THREAD, LOG_SYSCALLS, 2, "sigaction emulation => %d\n", -res);
if (res == 0)
set_success_return_val(dcontext, 0);
else
set_failure_return_val(dcontext, res);
}
break;
}
#if defined(LINUX) && !defined(X64)
case SYS_sigaction: {
* struct old_sigaction *oact)
*/
int sig = (int)sys_param(dcontext, 0);
const old_sigaction_t *act = (const old_sigaction_t *)sys_param(dcontext, 1);
old_sigaction_t *oact = (old_sigaction_t *)sys_param(dcontext, 2);
uint res;
LOG(THREAD, LOG_SYSCALLS, 2, "syscall: sigaction %d " PFX " " PFX "\n", sig, act,
oact);
dcontext->sys_param0 = (reg_t)sig;
dcontext->sys_param1 = (reg_t)act;
dcontext->sys_param2 = (reg_t)oact;
execute_syscall = handle_old_sigaction(dcontext, sig, act, oact, &res);
if (!execute_syscall) {
LOG(THREAD, LOG_SYSCALLS, 2, "sigaction emulation => %d\n", -res);
if (res == 0)
set_success_return_val(dcontext, 0);
else
set_failure_return_val(dcontext, res);
}
break;
}
#endif
#if defined(LINUX) && !defined(X64)
case SYS_sigreturn: {
asmlinkage int sys_sigreturn(unsigned long __unused)
*/
execute_syscall = handle_sigreturn(dcontext, false);
* returns false it always redirects the context, and thus no
* need to set return val here.
*/
break;
}
#endif
#ifdef LINUX
case SYS_rt_sigreturn: {
asmlinkage int sys_rt_sigreturn(unsigned long __unused)
*/
execute_syscall = handle_sigreturn(dcontext, true);
break;
}
#endif
#ifdef MACOS
case SYS_sigreturn: {
execute_syscall = handle_sigreturn(dcontext, (void *)sys_param(dcontext, 0),
(int)sys_param(dcontext, 1));
break;
}
#endif
case SYS_sigaltstack: {
asmlinkage int
sys_sigaltstack(const stack_t *uss, stack_t *uoss)
*/
const stack_t *uss = (const stack_t *)sys_param(dcontext, 0);
stack_t *uoss = (stack_t *)sys_param(dcontext, 1);
uint res;
LOG(THREAD, LOG_SYSCALLS, 2, "syscall: sigaltstack " PFX " " PFX "\n", uss, uoss);
execute_syscall =
handle_sigaltstack(dcontext, uss, uoss, get_mcontext(dcontext)->xsp, &res);
if (!execute_syscall) {
LOG(THREAD, LOG_SYSCALLS, 2, "sigaltstack emulation => %d\n", -res);
if (res == 0)
set_success_return_val(dcontext, res);
else
set_failure_return_val(dcontext, res);
}
break;
}
case IF_MACOS_ELSE(SYS_sigprocmask, SYS_rt_sigprocmask): {
asmlinkage long
sys_rt_sigprocmask(int how, sigset_t *set, sigset_t *oset,
size_t sigsetsize)
*/
uint error_code = 0;
dcontext->sys_param0 = sys_param(dcontext, 0);
dcontext->sys_param1 = sys_param(dcontext, 1);
dcontext->sys_param2 = sys_param(dcontext, 2);
* kernel_sigset_t size instead.
*/
size_t sigsetsize =
(size_t)IF_MACOS_ELSE(sizeof(kernel_sigset_t), sys_param(dcontext, 3));
dcontext->sys_param3 = (reg_t)sigsetsize;
execute_syscall = handle_sigprocmask(dcontext, (int)sys_param(dcontext, 0),
(kernel_sigset_t *)sys_param(dcontext, 1),
(kernel_sigset_t *)sys_param(dcontext, 2),
sigsetsize, &error_code);
if (!execute_syscall) {
if (error_code == 0)
set_success_return_val(dcontext, 0);
else
set_failure_return_val(dcontext, error_code);
}
break;
}
#ifdef MACOS
case SYS_sigsuspend_nocancel:
#endif
case IF_MACOS_ELSE(SYS_sigsuspend, SYS_rt_sigsuspend): {
asmlinkage int
sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize)
*/
handle_sigsuspend(dcontext, (kernel_sigset_t *)sys_param(dcontext, 0),
(size_t)sys_param(dcontext, 1));
break;
}
#ifdef LINUX
# ifdef SYS_signalfd
case SYS_signalfd:
# endif
case SYS_signalfd4: {
ptr_int_t new_result;
dcontext->sys_param0 = sys_param(dcontext, 0);
dcontext->sys_param1 = sys_param(dcontext, 1);
dcontext->sys_param2 = sys_param(dcontext, 2);
# ifdef SYS_signalfd
if (dcontext->sys_num == SYS_signalfd)
dcontext->sys_param3 = 0;
else
# endif
dcontext->sys_param3 = sys_param(dcontext, 3);
new_result = handle_pre_signalfd(
dcontext, (int)dcontext->sys_param0, (kernel_sigset_t *)dcontext->sys_param1,
(size_t)dcontext->sys_param2, (int)dcontext->sys_param3);
execute_syscall = false;
set_success_return_val(dcontext, new_result);
break;
}
#endif
case SYS_kill: {
* asmlinkage long sys_kill(int pid, int sig)
*/
pid_t pid = (pid_t)sys_param(dcontext, 0);
uint sig = (uint)sys_param(dcontext, 1);
LOG(GLOBAL, LOG_TOP | LOG_SYSCALLS, 2,
"thread " TIDFMT " sending signal %d to pid " PIDFMT "\n",
d_r_get_thread_id(), sig, pid);
if (pid == get_process_id() || pid == 0 || pid == -get_process_group_id()) {
handle_self_signal(dcontext, sig);
}
break;
}
#if defined(SYS_tkill)
case SYS_tkill: {
* asmlinkage long sys_tkill(int pid, int sig)
*/
pid_t tid = (pid_t)sys_param(dcontext, 0);
uint sig = (uint)sys_param(dcontext, 1);
LOG(GLOBAL, LOG_TOP | LOG_SYSCALLS, 2,
"thread " TIDFMT " sending signal %d to tid %d\n", d_r_get_thread_id(), sig,
tid);
if (tid == d_r_get_thread_id()) {
handle_self_signal(dcontext, sig);
}
break;
}
#endif
#if defined(SYS_tgkill)
case SYS_tgkill: {
* asmlinkage long sys_tgkill(int tgid, int pid, int sig)
*/
pid_t tgid = (pid_t)sys_param(dcontext, 0);
pid_t tid = (pid_t)sys_param(dcontext, 1);
uint sig = (uint)sys_param(dcontext, 2);
LOG(GLOBAL, LOG_TOP | LOG_SYSCALLS, 2,
"thread " TIDFMT " sending signal %d to tid %d tgid %d\n",
d_r_get_thread_id(), sig, tid, tgid);
+ tgkill(-1, tid, sig) == tkill(tid, sig)
* tgkill(tgid, -1, sig) == kill(tgid, sig)
* the 2nd was proposed but is not in 2.6.20 so I'm ignoring it, since
* I don't want to kill the thread when the signal is never sent!
* FIXME: the 1st is in my tkill manpage, but not my 2.6.20 kernel sources!
*/
if ((tgid == -1 || tgid == get_process_id()) && tid == d_r_get_thread_id()) {
handle_self_signal(dcontext, sig);
}
break;
}
#endif
case SYS_setitimer:
dcontext->sys_param0 = sys_param(dcontext, 0);
dcontext->sys_param1 = sys_param(dcontext, 1);
dcontext->sys_param2 = sys_param(dcontext, 2);
handle_pre_setitimer(dcontext, (int)sys_param(dcontext, 0),
(const struct itimerval *)sys_param(dcontext, 1),
(struct itimerval *)sys_param(dcontext, 2));
break;
case SYS_getitimer:
dcontext->sys_param0 = sys_param(dcontext, 0);
dcontext->sys_param1 = sys_param(dcontext, 1);
break;
#if defined(LINUX) && defined(X86)
case SYS_alarm:
dcontext->sys_param0 = sys_param(dcontext, 0);
handle_pre_alarm(dcontext, (unsigned int)dcontext->sys_param0);
break;
#endif
#if 0
# ifndef X64
case SYS_signal: {
asmlinkage unsigned long
sys_signal(int sig, __sighandler_t handler)
*/
break;
}
case SYS_sigsuspend: {
asmlinkage int
sys_sigsuspend(int history0, int history1, old_sigset_t mask)
*/
break;
}
case SYS_sigprocmask: {
asmlinkage long
sys_sigprocmask(int how, old_sigset_t *set, old_sigset_t *oset)
*/
break;
}
# endif
#else
# if defined(LINUX) && !defined(X64)
# ifndef ARM
case SYS_signal:
# endif
case SYS_sigsuspend:
case SYS_sigprocmask:
# endif
#endif
#if defined(LINUX) && !defined(X64)
case SYS_sigpending:
# ifndef ARM
case SYS_sgetmask:
case SYS_ssetmask:
# endif
#endif
#ifdef LINUX
# ifdef SYS_rt_sigtimedwait_time64
case SYS_rt_sigtimedwait_time64:
# endif
case SYS_rt_sigtimedwait:
case SYS_rt_sigqueueinfo:
case SYS_rt_tgsigqueueinfo:
#endif
case IF_MACOS_ELSE(SYS_sigpending, SYS_rt_sigpending): {
LOG(THREAD, LOG_ASYNCH | LOG_SYSCALLS, 1,
"WARNING: unhandled signal system call %d\n", dcontext->sys_num);
SYSLOG_INTERNAL_WARNING_ONCE("unhandled signal system call %d",
dcontext->sys_num);
break;
}
#ifdef LINUX
# ifdef SYS_ppoll_time64
case SYS_ppoll_time64:
# endif
case SYS_ppoll: {
kernel_sigset_t *sigmask = (kernel_sigset_t *)sys_param(dcontext, 3);
dcontext->sys_param3 = (reg_t)sigmask;
if (sigmask == NULL)
break;
size_t sizemask = (size_t)sys_param(dcontext, 4);
* a non p* version, and the mask's semantics are emulated by DR instead.
*/
set_syscall_param(dcontext, 3, (reg_t)NULL);
bool sig_pending = false;
if (!handle_pre_extended_syscall_sigmasks(dcontext, sigmask, sizemask,
&sig_pending)) {
* failure. We're already violating app transparency in other places in DR.
*/
set_failure_return_val(dcontext, EINVAL);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
execute_syscall = false;
}
if (sig_pending) {
* parameter, but we leave the modified signal mask.
*/
set_syscall_param(dcontext, 3, dcontext->sys_param3);
set_failure_return_val(dcontext, EINTR);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
execute_syscall = false;
}
break;
}
# ifdef SYS_pselect6_time64
case SYS_pselect6_time64:
# endif
case SYS_pselect6: {
typedef struct {
kernel_sigset_t *sigmask;
size_t sizemask;
} data_t;
dcontext->sys_param3 = sys_param(dcontext, 5);
data_t *data_param = (data_t *)dcontext->sys_param3;
data_t data;
if (data_param == NULL) {
* just a NULL sigmask.
*/
dcontext->sys_param4 = (reg_t)NULL;
break;
}
* argument in pselect6.
*/
if (!d_r_safe_read(data_param, sizeof(data), &data)) {
LOG(THREAD, LOG_SYSCALLS, 2, "\treturning EFAULT to app for pselect6\n");
set_failure_return_val(dcontext, EFAULT);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
execute_syscall = false;
break;
}
dcontext->sys_param4 = (reg_t)data.sigmask;
if (data.sigmask == NULL)
break;
kernel_sigset_t *nullsigmaskptr = NULL;
if (!safe_write_ex((void *)&data_param->sigmask, sizeof(data_param->sigmask),
&nullsigmaskptr, NULL)) {
set_failure_return_val(dcontext, EFAULT);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
execute_syscall = false;
break;
}
bool sig_pending = false;
if (!handle_pre_extended_syscall_sigmasks(dcontext, data.sigmask, data.sizemask,
&sig_pending)) {
set_failure_return_val(dcontext, EINVAL);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
execute_syscall = false;
}
if (sig_pending) {
if (!safe_write_ex((void *)&data_param->sigmask, sizeof(data_param->sigmask),
&dcontext->sys_param4, NULL)) {
set_failure_return_val(dcontext, EFAULT);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
execute_syscall = false;
break;
}
set_failure_return_val(dcontext, EINTR);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
execute_syscall = false;
}
break;
}
case SYS_epoll_pwait: {
kernel_sigset_t *sigmask = (kernel_sigset_t *)sys_param(dcontext, 4);
dcontext->sys_param4 = (reg_t)sigmask;
if (sigmask == NULL)
break;
size_t sizemask = (size_t)sys_param(dcontext, 5);
set_syscall_param(dcontext, 4, (reg_t)NULL);
bool sig_pending = false;
if (!handle_pre_extended_syscall_sigmasks(dcontext, sigmask, sizemask,
&sig_pending)) {
set_failure_return_val(dcontext, EINVAL);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
execute_syscall = false;
}
if (sig_pending) {
set_syscall_param(dcontext, 4, dcontext->sys_param4);
set_failure_return_val(dcontext, EINTR);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
execute_syscall = false;
}
break;
}
#endif
* it's not worth monitoring all syscalls that take in fds from affecting ours.
*/
#ifdef MACOS
case SYS_close_nocancel:
#endif
#ifdef SYS_close_range
case SYS_close_range: {
* run on an AArchXX machine that has close_range available.
*/
IF_AARCHXX(ASSERT_NOT_TESTED());
uint first_fd = sys_param(dcontext, 0), last_fd = sys_param(dcontext, 1);
uint flags = sys_param(dcontext, 2);
bool is_cloexec = TEST(CLOSE_RANGE_CLOEXEC, flags);
if (is_cloexec) {
* verified on a system with kernel version >= 5.11 yet.
*/
ASSERT_NOT_TESTED();
}
* make multiple close_range syscalls ourselves, one for each contiguous
* sub-range of non-DR-private fds in [first, last].
*/
execute_syscall = false;
if (first_fd > last_fd) {
set_failure_return_val(dcontext, EINVAL);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
break;
}
uint cur_range_first_fd = 0;
uint cur_range_last_fd = 0;
bool cur_range_valid = false;
int ret = 0;
for (int i = first_fd; i <= last_fd; i++) {
if ((is_cloexec && fd_is_dr_owned(i)) ||
(!is_cloexec && !handle_close_range_pre(dcontext, i))) {
SYSLOG_INTERNAL_WARNING_ONCE("app trying to close private fd(s)");
if (cur_range_valid) {
cur_range_valid = false;
ret = dynamorio_syscall(SYS_close_range, 3, cur_range_first_fd,
cur_range_last_fd, flags);
if (ret != 0)
break;
}
} else {
# ifdef LINUX
if (!is_cloexec) {
signal_handle_close(dcontext, i);
}
# endif
if (cur_range_valid) {
ASSERT(cur_range_last_fd == i - 1);
cur_range_last_fd = i;
} else {
cur_range_first_fd = i;
cur_range_last_fd = i;
cur_range_valid = true;
}
}
}
if (cur_range_valid) {
ret = dynamorio_syscall(SYS_close_range, 3, cur_range_first_fd,
cur_range_last_fd, flags);
}
if (ret != 0) {
set_failure_return_val(dcontext, ret);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
} else {
set_success_return_val(dcontext, 0);
}
break;
}
#endif
case SYS_close: {
execute_syscall = handle_close_pre(dcontext);
#ifdef LINUX
if (execute_syscall)
signal_handle_close(dcontext, (file_t)sys_param(dcontext, 0));
#endif
break;
}
#if defined(SYS_dup2) || defined(SYS_dup3)
# ifdef SYS_dup3
case SYS_dup3:
# endif
# ifdef SYS_dup2
case SYS_dup2:
# endif
{
file_t newfd = (file_t)sys_param(dcontext, 1);
if (fd_is_dr_owned(newfd) || fd_is_in_private_range(newfd)) {
SYSLOG_INTERNAL_WARNING_ONCE("app trying to dup-close DR file(s)");
LOG(THREAD, LOG_TOP | LOG_SYSCALLS, 1,
"WARNING: app trying to dup2/dup3 to %d. Disallowing.\n", newfd);
if (DYNAMO_OPTION(fail_on_stolen_fds)) {
set_failure_return_val(dcontext, EBADF);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
} else
set_success_return_val(dcontext, 0);
execute_syscall = false;
}
break;
}
#endif
#ifdef MACOS
case SYS_fcntl_nocancel:
#endif
case SYS_fcntl: {
int cmd = (int)sys_param(dcontext, 1);
long arg = (long)sys_param(dcontext, 2);
* but actual will be above (see notes in os_file_init())
*/
if ((cmd == F_DUPFD || cmd == F_DUPFD_CLOEXEC) && fd_is_in_private_range(arg)) {
SYSLOG_INTERNAL_WARNING_ONCE("app trying to open private fd(s)");
LOG(THREAD, LOG_TOP | LOG_SYSCALLS, 1,
"WARNING: app trying to dup to >= %d. Disallowing.\n", arg);
set_failure_return_val(dcontext, EINVAL);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
execute_syscall = false;
} else {
dcontext->sys_param0 = sys_param(dcontext, 0);
dcontext->sys_param1 = cmd;
}
break;
}
#if defined(X64) || !defined(ARM) || defined(MACOS)
case SYS_getrlimit:
#endif
#if defined(LINUX) && !defined(X64)
case SYS_ugetrlimit:
#endif
dcontext->sys_param0 = sys_param(dcontext, 0);
dcontext->sys_param1 = sys_param(dcontext, 1);
break;
case SYS_setrlimit: {
int resource = (int)sys_param(dcontext, 0);
if (resource == RLIMIT_NOFILE && DYNAMO_OPTION(steal_fds) > 0) {
#if !defined(ARM) && !defined(X64) && !defined(MACOS)
struct compat_rlimit rlim;
#else
struct rlimit rlim;
#endif
if (!d_r_safe_read((void *)sys_param(dcontext, 1), sizeof(rlim), &rlim)) {
LOG(THREAD, LOG_SYSCALLS, 2, "\treturning EFAULT to app for prlimit64\n");
set_failure_return_val(dcontext, EFAULT);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
} else if (rlim.rlim_cur > rlim.rlim_max) {
LOG(THREAD, LOG_SYSCALLS, 2, "\treturning EINVAL for prlimit64\n");
set_failure_return_val(dcontext, EINVAL);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
} else if (rlim.rlim_max <= min_dr_fd &&
* XXX i#2980: should query for that capability.
*/
rlim.rlim_max <= app_rlimit_nofile.rlim_max) {
app_rlimit_nofile.rlim_cur = rlim.rlim_cur;
app_rlimit_nofile.rlim_max = rlim.rlim_max;
set_success_return_val(dcontext, 0);
} else {
LOG(THREAD, LOG_SYSCALLS, 2, "\treturning EPERM to app for setrlimit\n");
set_failure_return_val(dcontext, EPERM);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
}
execute_syscall = false;
}
break;
}
#ifdef LINUX
case SYS_prlimit64:
dcontext->sys_param0 = sys_param(dcontext, 0);
dcontext->sys_param1 = sys_param(dcontext, 1);
dcontext->sys_param2 = sys_param(dcontext, 2);
dcontext->sys_param3 = sys_param(dcontext, 3);
if (
* process that is running with DynamoRIO?
*/
* descriptor table, and different threads of the same process have
* separate file descriptor tables. POSIX specifies that rlimits are
* per-process, not per-thread, and Linux follows suit, so the threads
* with different descriptors will not matter, and the pids sharing
* descriptors turns into the hard-to-solve IPC problem.
*/
(dcontext->sys_param0 == 0 || dcontext->sys_param0 == get_process_id()) &&
dcontext->sys_param1 == RLIMIT_NOFILE &&
dcontext->sys_param2 != (reg_t)NULL && DYNAMO_OPTION(steal_fds) > 0) {
rlimit64_t rlim;
if (!d_r_safe_read((void *)(dcontext->sys_param2), sizeof(rlim), &rlim)) {
LOG(THREAD, LOG_SYSCALLS, 2, "\treturning EFAULT to app for prlimit64\n");
set_failure_return_val(dcontext, EFAULT);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
} else {
LOG(THREAD, LOG_SYSCALLS, 2,
"syscall: prlimit64 soft=" INT64_FORMAT_STRING
" hard=" INT64_FORMAT_STRING " vs DR %d\n",
rlim.rlim_cur, rlim.rlim_max, min_dr_fd);
if (rlim.rlim_cur > rlim.rlim_max) {
LOG(THREAD, LOG_SYSCALLS, 2, "\treturning EINVAL for prlimit64\n");
set_failure_return_val(dcontext, EINVAL);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
} else if (rlim.rlim_max <= min_dr_fd &&
* XXX i#2980: should query for that capability.
*/
rlim.rlim_max <= app_rlimit_nofile.rlim_max) {
app_rlimit_nofile.rlim_cur = rlim.rlim_cur;
app_rlimit_nofile.rlim_max = rlim.rlim_max;
set_success_return_val(dcontext, 0);
if (dcontext->sys_param3 != (reg_t)NULL) {
safe_write_ex((void *)(dcontext->sys_param3), sizeof(rlim),
&app_rlimit_nofile, NULL);
}
} else {
LOG(THREAD, LOG_SYSCALLS, 2,
"\treturning EPERM to app for prlimit64\n");
set_failure_return_val(dcontext, EPERM);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
}
}
execute_syscall = false;
}
break;
#endif
#ifdef LINUX
# ifdef SYS_readlink
case SYS_readlink:
# endif
case SYS_readlinkat:
if (DYNAMO_OPTION(early_inject)) {
dcontext->sys_param0 = sys_param(dcontext, 0);
dcontext->sys_param1 = sys_param(dcontext, 1);
dcontext->sys_param2 = sys_param(dcontext, 2);
if (dcontext->sys_num == SYS_readlinkat)
dcontext->sys_param3 = sys_param(dcontext, 3);
}
break;
# if defined(X86) && defined(X64)
case SYS_arch_prctl: {
dcontext->sys_param0 = sys_param(dcontext, 0);
dcontext->sys_param1 = sys_param(dcontext, 1);
break;
}
# endif
# ifdef X86
case SYS_set_thread_area: {
our_modify_ldt_t desc;
if (INTERNAL_OPTION(mangle_app_seg) &&
d_r_safe_read((void *)sys_param(dcontext, 0), sizeof(desc), &desc)) {
if (os_set_app_thread_area(dcontext, &desc) &&
safe_write_ex((void *)sys_param(dcontext, 0), sizeof(desc), &desc,
NULL)) {
ASSERT_CURIOSITY(desc.limit == 0xfffff);
execute_syscall = false;
set_success_return_val(dcontext, 0);
}
}
break;
}
case SYS_get_thread_area: {
our_modify_ldt_t desc;
if (INTERNAL_OPTION(mangle_app_seg) &&
d_r_safe_read((const void *)sys_param(dcontext, 0), sizeof(desc), &desc)) {
if (os_get_app_thread_area(dcontext, &desc) &&
safe_write_ex((void *)sys_param(dcontext, 0), sizeof(desc), &desc,
NULL)) {
execute_syscall = false;
set_success_return_val(dcontext, 0);
}
}
break;
}
# endif
# ifdef ARM
case SYS_set_tls: {
LOG(THREAD, LOG_VMAREAS | LOG_SYSCALLS, 2, "syscall: set_tls " PFX "\n",
sys_param(dcontext, 0));
if (os_set_app_tls_base(dcontext, TLS_REG_LIB, (void *)sys_param(dcontext, 0))) {
execute_syscall = false;
set_success_return_val(dcontext, 0);
} else {
ASSERT_NOT_REACHED();
}
break;
}
case SYS_cacheflush: {
* the selfmod status of this region: else we should call something
* that invokes handle_modified_code() in a way that handles a bigger
* region than a single write.
*/
app_pc start = (app_pc)sys_param(dcontext, 0);
app_pc end = (app_pc)sys_param(dcontext, 1);
LOG(THREAD, LOG_VMAREAS | LOG_SYSCALLS, 2,
"syscall: cacheflush " PFX "-" PFX "\n", start, end);
flush_fragments_from_region(dcontext, start, end - start,
* use synch to ensure other threads see the
* new code.
*/
false ,
NULL ,
NULL );
break;
}
# endif
#elif defined(MACOS)
#endif
#ifdef DEBUG
# ifdef MACOS
case SYS_open_nocancel:
# endif
# ifdef SYS_open
case SYS_open: {
dcontext->sys_param0 = sys_param(dcontext, 0);
break;
}
# endif
#endif
#ifdef SYS_openat2
case SYS_openat2:
#endif
case SYS_openat: {
* possibly others, but SYS_openat is all we need on modern systems so we
* limit syscall overhead to this single point for now.
*/
dcontext->sys_param0 = 0;
dcontext->sys_param1 = sys_param(dcontext, 1);
const char *path = (const char *)dcontext->sys_param1;
if (!IS_STRING_OPTION_EMPTY(xarch_root) && !os_file_exists(path, false)) {
char *buf = heap_alloc(dcontext, MAXIMUM_PATH HEAPACCT(ACCT_OTHER));
string_option_read_lock();
snprintf(buf, MAXIMUM_PATH, "%s/%s", DYNAMO_OPTION(xarch_root), path);
buf[MAXIMUM_PATH - 1] = '\0';
string_option_read_unlock();
if (os_file_exists(buf, false)) {
LOG(THREAD, LOG_SYSCALLS, 2, "SYS_openat: replacing |%s| with |%s|\n",
path, buf);
set_syscall_param(dcontext, 1, (reg_t)buf);
dcontext->sys_param0 = (reg_t)buf;
} else
heap_free(dcontext, buf, MAXIMUM_PATH HEAPACCT(ACCT_OTHER));
}
break;
}
#ifdef LINUX
case SYS_rseq:
LOG(THREAD, LOG_VMAREAS | LOG_SYSCALLS, 2, "syscall: rseq " PFX " %d %d %d\n",
sys_param(dcontext, 0), sys_param(dcontext, 1), sys_param(dcontext, 2),
sys_param(dcontext, 3));
if (DYNAMO_OPTION(disable_rseq)) {
set_failure_return_val(dcontext, ENOSYS);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
execute_syscall = false;
} else {
dcontext->sys_param0 = sys_param(dcontext, 0);
}
break;
#endif
default: {
#ifdef VMX86_SERVER
if (is_vmkuw_sysnum(dcontext->sys_num)) {
execute_syscall = vmkuw_pre_system_call(dcontext);
break;
}
#endif
break;
}
}
dcontext->whereami = old_whereami;
return execute_syscall;
}
void
all_memory_areas_lock(void)
{
IF_NO_MEMQUERY(memcache_lock());
}
void
all_memory_areas_unlock(void)
{
IF_NO_MEMQUERY(memcache_unlock());
}
void
update_all_memory_areas(app_pc start, app_pc end, uint prot, int type)
{
IF_NO_MEMQUERY(memcache_update(start, end, prot, type));
}
bool
remove_from_all_memory_areas(app_pc start, app_pc end)
{
IF_NO_MEMQUERY(return memcache_remove(start, end));
return true;
}
* overmaps to ensure things look consistent. */
static bool
mmap_check_for_module_overlap(app_pc base, size_t size, bool readable, uint64 inode,
bool at_map)
{
module_area_t *ma;
os_get_module_info_lock();
ma = module_pc_lookup(base);
if (ma != NULL) {
* over the initial map from someone just mapping over part of a module? If
* is the latter case need to adjust the view size or remove from module list. */
LOG(GLOBAL, LOG_VMAREAS, 2,
"%s mmap overlapping module area : \n"
"\tmap : base=" PFX " base+size=" PFX " inode=" UINT64_FORMAT_STRING "\n"
"\tmod : start=" PFX " end=" PFX " inode=" UINT64_FORMAT_STRING "\n",
at_map ? "new" : "existing", base, base + size, inode, ma->start, ma->end,
ma->names.inode);
ASSERT_CURIOSITY(base >= ma->start);
if (at_map) {
ASSERT_CURIOSITY(base + size <= ma->end);
} else {
* haven't been able to get gdb to break in early enough to really get a good
* look at the early loader behavior. Two issues: One case is with our .so
* for which the anonymous .bss mapping is one page larger than expected
* (which might be some loader bug in the size calculation? or something? if
* so should see it trigger the at_map curiosity on some dll and can address
* then) and the other is that for a few executables the .bss mapping is much
* larger (~0x20000 larger) then expected when running under DR (but not
* running natively where it is instead the expected size). Both could just
* be the loader merging adjacent identically protected regions though I
* can't explain the discrepancy between DR and native given that our vmmheap
* is elsewhere in the address space (so who and how allocated that adjacent
* memory). I've yet to see any issue with dynamically loaded modules so
* it's probably the loader merging regions. Still worth investigating. */
ASSERT_CURIOSITY(inode == 0 ||
module_contains_addr(ma, base + size - 1));
}
* of the file mapping (i#2566).
*/
if (ma->names.inode == 0)
ma->names.inode = inode;
ASSERT_CURIOSITY(ma->names.inode == inode || inode == 0 );
DOCHECK(1, {
if (readable && module_is_header(base, size)) {
* disk page could hold both RO and .data, occupying just 1 page of
* disk space, e.g. /usr/lib/httpd/modules/mod_auth_anon.so. When
* such a module is mapped in, the os maps the same disk page twice,
* one readonly and one copy-on-write (see pg. 96, Sec 4.4 from
* Linkers and Loaders by John R. Levine). It also possible for
* such small modules to have multiple LOAD data segments. Since all
* these segments are mapped from a single disk page they will all have an
* elf_header satisfying the check above. So, if the new mmap overlaps an
* elf_area and it is also a header, then make sure the offsets (from the
* beginning of the backing file) of all the segments up to the currect
* one are within the page size. Note, if it is a header of a different
* module, then we'll not have an overlap, so we will not hit this case.
*/
bool cur_seg_found = false;
int seg_id = 0;
while (seg_id < ma->os_data.num_segments &&
ma->os_data.segments[seg_id].start <= base) {
cur_seg_found = ma->os_data.segments[seg_id].start == base;
ASSERT_CURIOSITY(
ma->os_data.segments[seg_id].offset <
PAGE_SIZE
* address space, so we often hit modules we already
* know about. */
IF_MACOS(|| !dynamo_initialized && ma->start == base));
++seg_id;
}
ASSERT_CURIOSITY(cur_seg_found);
}
});
}
os_get_module_info_unlock();
#ifdef ANDROID
* loader does not map the whole file up front.
*/
if (ma != NULL && at_map && readable)
os_module_update_dynamic_info(base, size, at_map);
#endif
return ma != NULL;
}
static void
os_add_new_app_module(dcontext_t *dcontext, bool at_map, app_pc base, size_t size,
uint memprot)
{
memquery_iter_t iter;
bool found_map = false;
uint64 inode = 0;
const char *filename = "";
size_t mod_size = size;
if (!at_map) {
app_pc first_seg_base = NULL;
app_pc first_seg_end = NULL;
app_pc last_seg_end = NULL;
if (module_walk_program_headers(base, size, at_map, false, &first_seg_base,
&first_seg_end, &last_seg_end, NULL, NULL)) {
ASSERT_CURIOSITY(size ==
(ALIGN_FORWARD(first_seg_end, PAGE_SIZE) -
(ptr_uint_t)first_seg_base) ||
base == vdso_page_start || base == vsyscall_page_start);
mod_size =
ALIGN_FORWARD(last_seg_end, PAGE_SIZE) - (ptr_uint_t)first_seg_base;
}
}
LOG(THREAD, LOG_SYSCALLS | LOG_VMAREAS, 2, "dlopen " PFX "-" PFX "%s\n", base,
base + mod_size, TEST(MEMPROT_EXEC, memprot) ? " +x" : "");
* behavior, it first maps the file in with size equal to the final
* memory image size (I'm not sure how it gets that size without reading
* in the elf header and then walking through all the program headers to
* get the largest virtual offset). This is necessary to reserve all the
* space that will be needed. It then walks through the program headers
* mapping over the the previously mapped space with the appropriate
* permissions and offsets. Note that the .bss portion is mapped over
* as anonymous. It may also, depending on the program headers, make some
* areas read-only after fixing up their relocations etc. NOTE - at
* no point are the section headers guaranteed to be mapped in so we can't
* reliably walk sections (only segments) without looking to disk.
*/
* seems to be the best choice as we know the bounds and it's difficult to
* tell when the loader is finished. The downside is that at the initial map
* the memory layout isn't finalized (memory beyond the first segment will
* be shifted for page alignment reasons), so we have to be careful and
* make adjustments to read anything beyond the first segment until the
* loader finishes. This goes for the client too as it gets notified when we
* add to the list. FIXME we could try to track the expected segment overmaps
* and only notify the client after the last one (though that's still before
* linking and relocation, but that's true on Windows too). */
memquery_iterator_start(&iter, base, true );
while (memquery_iterator_next(&iter)) {
if (iter.vm_start == base) {
ASSERT_CURIOSITY(iter.inode != 0 || base == vdso_page_start ||
base == vsyscall_page_start);
ASSERT_CURIOSITY(iter.offset == 0);
ASSERT_CURIOSITY(
(iter.vm_end - iter.vm_start == ALIGN_FORWARD(size, PAGE_SIZE)));
inode = iter.inode;
filename = dr_strdup(iter.comment HEAPACCT(ACCT_OTHER));
found_map = true;
break;
}
}
memquery_iterator_stop(&iter);
#ifdef HAVE_MEMINFO
ASSERT_CURIOSITY(found_map);
#else
* library map time, there is no way to find out the name of the file
* that was mapped, thus its inode isn't available either.
*
* Just module_list_add with no filename will still result in
* library name being extracted from the .dynamic section and added
* to the module list. However, this name may not always exist, thus
* we might have a library with no file name available at all!
*
* Note: visor implements vsi mem maps that give file info, but, no
* path, should be ok. xref PR 401580.
*
* Once PR 235433 is implemented in visor then fix memquery_iterator*() to
* use vsi to find out page protection info, file name & inode.
*/
#endif
if (found_map) {
module_list_add(base, ALIGN_FORWARD(mod_size, PAGE_SIZE), at_map, filename,
inode);
dr_strfree(filename HEAPACCT(ACCT_OTHER));
}
}
void
os_check_new_app_module(dcontext_t *dcontext, app_pc pc)
{
module_area_t *ma;
os_get_module_info_lock();
ma = module_pc_lookup(pc);
if (ma == NULL) {
dr_mem_info_t info;
* might not be detected by DynamoRIO in process_mmap.
*/
if (query_memory_ex_from_os(pc, &info) && info.type == DR_MEMTYPE_IMAGE) {
os_get_module_info_unlock();
os_add_new_app_module(get_thread_private_dcontext(), false ,
info.base_pc, info.size, info.prot);
os_get_module_info_lock();
}
}
os_get_module_info_unlock();
}
static void
process_mmap(dcontext_t *dcontext, app_pc base, size_t size, uint prot,
uint flags _IF_DEBUG(const char *map_type))
{
bool image = false;
uint memprot = osprot_to_memprot(prot);
#ifdef ANDROID
if (!TEST(MAP_ANONYMOUS, flags))
memprot |= MEMPROT_HAS_COMMENT;
#endif
LOG(THREAD, LOG_SYSCALLS, 4, "process_mmap(" PFX "," PFX ",0x%x,%s,%s)\n", base, size,
flags, memprot_string(memprot), map_type);
*
* o The initial mmap for an ELF file specifies enough space for
* all segments (and their constituent sections) in the file.
* The protection bits for that section are used for the entire
* region, and subsequent mmaps for subsequent segments within
* the region modify their portion's protection bits as needed.
* So if the prot bits for the first segment are +x, the entire
* region is +x. ** Note that our primary concern is adjusting
* exec areas to reflect the prot bits of subsequent
* segments. ** The region is added to the all-memory areas
* and also to exec areas (as determined by app_memory_allocation()).
*
* o Any subsequent segment sub-mappings specify their own protection
* bits and therefore are added to the exec areas via normal
* processing. They are also "naturally" added to the all-mems list.
* We do a little extra processing when mapping into a previously
* mapped region and the prot bits mismatch; if the new mapping is
* not +x, flushing needs to occur.
*/
* can read the memory to see if it is a elf_header
*/
if (TEST(MAP_ANONYMOUS, flags)) {
LOG(THREAD, LOG_SYSCALLS, 4, "mmap " PFX ": anon\n", base);
} else if (mmap_check_for_module_overlap(base, size, TEST(MEMPROT_READ, memprot), 0,
true)) {
* over the initial map from someone just mapping over part of a module? If
* is the latter case need to adjust the view size or remove from module list. */
image = true;
DODEBUG({ map_type = "ELF SO"; });
LOG(THREAD, LOG_SYSCALLS, 4, "mmap " PFX ": overlaps image\n", base);
} else if (TEST(MEMPROT_READ, memprot) &&
* read, so pass size=0 to use a safe_read.
*/
module_is_header(base, 0)) {
#ifdef ANDROID
dr_mem_info_t info;
if (query_memory_ex_from_os((byte *)ALIGN_FORWARD(base + size, PAGE_SIZE),
&info) &&
info.prot == MEMPROT_NONE && info.type == DR_MEMTYPE_DATA) {
LOG(THREAD, LOG_SYSCALLS, 4, "mmap " PFX ": Android elf\n", base);
image = true;
DODEBUG({ map_type = "ELF SO"; });
os_add_new_app_module(dcontext, true , base,
size, memprot);
} else
#endif
if (module_is_partial_map(base, size, memprot)) {
* might accidentally be treated as a module load. Heuristically
* distinguish this by saying that if this is the first mmap for an ELF
* (i.e., it doesn't overlap with a previous map), and if it's small,
* then don't treat it as a module load.
*/
LOG(THREAD, LOG_SYSCALLS, 4, "mmap " PFX ": partial\n", base);
} else {
LOG(THREAD, LOG_SYSCALLS, 4, "mmap " PFX ": elf header\n", base);
image = true;
DODEBUG({ map_type = "ELF SO"; });
os_add_new_app_module(dcontext, true , base, size, memprot);
}
}
LOG(THREAD, LOG_SYSCALLS, 4, "\t try app_mem_alloc\n");
IF_NO_MEMQUERY(memcache_handle_mmap(dcontext, base, size, memprot, image));
if (app_memory_allocation(dcontext, base, size, memprot, image _IF_DEBUG(map_type)))
STATS_INC(num_app_code_modules);
LOG(THREAD, LOG_SYSCALLS, 4, "\t app_mem_alloc -- DONE\n");
}
#ifdef LINUX
* i#173: old_prot and old_type should be from before the system call
*/
static bool
handle_app_mremap(dcontext_t *dcontext, byte *base, size_t size, byte *old_base,
size_t old_size, uint old_prot, uint old_type)
{
if (!mmap_syscall_succeeded(base))
return false;
if (base != old_base || size < old_size) {
* there was a change */
DEBUG_DECLARE(bool ok;)
app_memory_deallocation(dcontext, (app_pc)old_base, old_size,
false ,
false );
DOCHECK(1, {
os_get_module_info_lock();
ASSERT_CURIOSITY(!module_overlaps(base, size));
os_get_module_info_unlock();
});
* the os) is the same as what the prot used to be for the old
* region.
*/
DOCHECK(1, {
uint memprot;
ok = get_memory_info_from_os(base, NULL, NULL, &memprot);
* +x may be caused by READ_IMPLIES_EXEC set in personality flag (i#262)
*/
ASSERT(ok &&
(memprot == old_prot || (memprot & (~MEMPROT_EXEC)) == old_prot));
});
app_memory_allocation(dcontext, base, size, old_prot,
old_type == DR_MEMTYPE_IMAGE _IF_DEBUG("mremap"));
IF_NO_MEMQUERY(memcache_handle_mremap(dcontext, base, size, old_base, old_size,
old_prot, old_type));
}
return true;
}
static void
handle_app_brk(dcontext_t *dcontext, byte *lowest_brk , byte *old_brk,
byte *new_brk)
{
old_brk = (app_pc)ALIGN_FORWARD(old_brk, PAGE_SIZE);
new_brk = (app_pc)ALIGN_FORWARD(new_brk, PAGE_SIZE);
if (new_brk < old_brk) {
* this here: but seems safest to do so, esp if someone made part of
* the heap read-only and then put code there.
*/
app_memory_deallocation(dcontext, new_brk, old_brk - new_brk,
false ,
false );
} else if (new_brk > old_brk) {
* w/ security policies.
*/
}
IF_NO_MEMQUERY(memcache_handle_app_brk(lowest_brk, old_brk, new_brk));
}
#endif
* the syscall.
*/
*/
void
post_system_call(dcontext_t *dcontext)
{
priv_mcontext_t *mc = get_mcontext(dcontext);
int sysnum = dcontext->sys_num;
* Some syscall return addresses that have the sign bit set and so
* appear to be failures but are not. They are handled on a
* case-by-case basis in the switch statement below.
*/
ptr_int_t result = (ptr_int_t)MCXT_SYSCALL_RES(mc);
bool success = syscall_successful(mc, sysnum);
app_pc base;
size_t size;
uint prot;
dr_where_am_i_t old_whereami;
DEBUG_DECLARE(bool ok;)
RSTATS_INC(post_syscall);
old_whereami = dcontext->whereami;
dcontext->whereami = DR_WHERE_SYSCALL_HANDLER;
#if defined(LINUX) && defined(X86)
* the syscall instruction its value is needed:
* 0xffffe400 <__kernel_vsyscall+0>: push %ebp
* 0xffffe401 <__kernel_vsyscall+1>: mov %ecx,%ebp
* 0xffffe403 <__kernel_vsyscall+3>: syscall
* 0xffffe405 <__kernel_vsyscall+5>: mov $0x2b,%ecx
* 0xffffe40a <__kernel_vsyscall+10>: movl %ecx,%ss
* 0xffffe40c <__kernel_vsyscall+12>: mov %ebp,%ecx
* 0xffffe40e <__kernel_vsyscall+14>: pop %ebp
* 0xffffe40f <__kernel_vsyscall+15>: ret
*/
if (should_syscall_method_be_sysenter() && !dcontext->sys_was_int) {
mc->xbp = dcontext->sys_xbp;
}
#endif
if (false
#ifdef SYS_fork
|| sysnum ==
SYS_fork
#endif
IF_LINUX(||
(sysnum == SYS_clone && !TEST(CLONE_VM, dcontext->sys_param0)) ||
(sysnum == SYS_clone3 &&
!TEST(CLONE_VM, get_stored_clone3_flags(dcontext))))) {
if (result == 0) {
thread_id_t child = get_sys_thread_id();
#ifdef DEBUG
thread_id_t parent = get_parent_id();
SYSLOG_INTERNAL_INFO("-- parent %d forked child %d --", parent, child);
#endif
ASSERT(parent != 0);
replace_thread_id(dcontext->owning_thread, child);
dcontext->owning_thread = child;
dcontext->owning_process = get_process_id();
* need access to static vars in dynamo.c, that's why we don't do it. */
* getting to post_system_call() is any of that going to be messed up
* by waiting till here to fixup the child logfolder/file and tid?
*/
dynamorio_fork_init(dcontext);
LOG(THREAD, LOG_SYSCALLS, 1,
"after fork-like syscall: parent is %d, child is %d\n", parent, child);
} else {
os_fork_post(dcontext, true );
}
}
LOG(THREAD, LOG_SYSCALLS, 2, "post syscall: sysnum=" PFX ", result=" PFX " (%d)\n",
sysnum, MCXT_SYSCALL_RES(mc), (int)MCXT_SYSCALL_RES(mc));
switch (sysnum) {
#ifdef DEBUG
# ifdef MACOS
case SYS_open_nocancel:
# endif
# ifdef SYS_open
case SYS_open: {
if (success) {
* module.c elf curiosities
*/
LOG(THREAD, LOG_SYSCALLS, 2, "SYS_open %s => %d\n", dcontext->sys_param0,
(int)result);
}
break;
}
# endif
#endif
#if defined(LINUX) && !defined(X64) && !defined(ARM)
case SYS_mmap:
#endif
case IF_MACOS_ELSE(SYS_mmap, IF_X64_ELSE(SYS_mmap, SYS_mmap2)): {
uint flags;
DEBUG_DECLARE(const char *map_type;)
RSTATS_INC(num_app_mmaps);
base = (app_pc)MCXT_SYSCALL_RES(mc);
* directly when dynamically loading an SO, i.e., dlopen(). */
#ifdef LINUX
success = mmap_syscall_succeeded((app_pc)result);
* largest uint value of any errno and the addr returned is
* page-aligned.
*/
ASSERT_CURIOSITY(
!success ||
((app_pc)result < (app_pc)(ptr_int_t)-0x1000 && ALIGNED(base, PAGE_SIZE)));
#else
ASSERT_CURIOSITY(!success || ALIGNED(base, PAGE_SIZE));
#endif
if (!success)
goto exit_post_system_call;
#if defined(LINUX) && !defined(X64) && !defined(ARM)
if (sysnum == SYS_mmap) {
* safe. */
mmap_arg_struct_t *arg = (mmap_arg_struct_t *)dcontext->sys_param0;
size = (size_t)arg->len;
prot = (uint)arg->prot;
flags = (uint)arg->flags;
DEBUG_DECLARE(map_type = "mmap";)
} else {
#endif
size = (size_t)dcontext->sys_param1;
prot = (uint)dcontext->sys_param2;
flags = (uint)dcontext->sys_param3;
DEBUG_DECLARE(map_type = IF_X64_ELSE("mmap2", "mmap");)
#if defined(LINUX) && !defined(X64) && !defined(ARM)
}
#endif
process_mmap(dcontext, base, size, prot, flags _IF_DEBUG(map_type));
break;
}
case SYS_munmap: {
app_pc addr = (app_pc)dcontext->sys_param0;
size_t len = (size_t)dcontext->sys_param1;
* and flushed fragments and removed the region from exec areas.
* If the unmap failed, we re-add the region to exec areas.
* For zero-length unmaps we don't need to re-add anything,
* and we hit an assert in vmareas.c if we try (i#4031).
*
* The same logic can be used on Windows (but isn't yet).
*/
* executes in the region after our pre_system_call processing
* but before the re-add below, it will get a security violation.
* That's less than ideal but at least isn't a security hole.
* The overall shortcoming is that we lose the state from our
* stateful security policies -- future exec list, tables used
* for RCT (.C/.E/.F) -- which can't be easily restored. Also,
* the re-add could add a region that wasn't on the exec list
* previously.
*
* See case 7559 for a better approach.
*/
if (!success && len != 0) {
dr_mem_info_t info;
DEBUG_DECLARE(ok =)
query_memory_ex_from_os(addr, &info);
ASSERT(ok);
app_memory_allocation(dcontext, addr, len, info.prot,
info.type ==
DR_MEMTYPE_IMAGE _IF_DEBUG("failed munmap"));
IF_NO_MEMQUERY(
memcache_update_locked((app_pc)ALIGN_BACKWARD(addr, PAGE_SIZE),
(app_pc)ALIGN_FORWARD(addr + len, PAGE_SIZE),
info.prot, info.type, false ));
}
break;
}
#ifdef LINUX
case SYS_mremap: {
app_pc old_base = (app_pc)dcontext->sys_param0;
size_t old_size = (size_t)dcontext->sys_param1;
base = (app_pc)MCXT_SYSCALL_RES(mc);
size = (size_t)dcontext->sys_param2;
RSTATS_INC(num_app_munmaps);
RSTATS_INC(num_app_mmaps);
success =
handle_app_mremap(dcontext, base, size, old_base, old_size,
* obtained from pre_system_call
*/
(uint)dcontext->sys_param3, (uint)dcontext->sys_param4);
* largest uint value of any errno and the addr returned is
* is page-aligned.
*/
ASSERT_CURIOSITY(
!success ||
((app_pc)result < (app_pc)(ptr_int_t)-0x1000 && ALIGNED(base, PAGE_SIZE)));
if (!success)
goto exit_post_system_call;
break;
}
#endif
case SYS_mprotect: {
base = (app_pc)dcontext->sys_param0;
size = dcontext->sys_param1;
prot = dcontext->sys_param2;
#ifdef VMX86_SERVER
if (os_in_vmkernel_userworld() && result == -EBUSY && prot == PROT_NONE) {
result = mprotect_syscall(base, size, PROT_READ);
set_success_return_val(dcontext, result);
success = (result >= 0);
LOG(THREAD, LOG_VMAREAS, 1,
"re-doing mprotect -EBUSY for " PFX "-" PFX " => %d\n", base, base + size,
(int)result);
SYSLOG_INTERNAL_WARNING_ONCE("re-doing mprotect for PR 475111, PR 107872");
}
#endif
* writable areas we've made read-only
*/
if (!success) {
uint memprot = 0;
if (!get_memory_info_from_os(base, NULL, NULL, &memprot))
memprot = PROT_NONE;
LOG(THREAD, LOG_SYSCALLS, 3,
"syscall: mprotect failed: " PFX "-" PFX " prot->%d\n", base, base + size,
osprot_to_memprot(prot));
LOG(THREAD, LOG_SYSCALLS, 3, "\told prot->%d\n", memprot);
if (prot != memprot_to_osprot(memprot)) {
* this action doesn't have any unexpected side effects
* when doing so (such as not reversing some bit of internal
* state).
*/
uint new_memprot;
DEBUG_DECLARE(uint res =)
app_memory_protection_change(dcontext, base, size,
osprot_to_memprot(prot), &new_memprot, NULL,
false );
ASSERT_NOT_IMPLEMENTED(res != SUBSET_APP_MEM_PROT_CHANGE);
ASSERT(res == DO_APP_MEM_PROT_CHANGE ||
res == PRETEND_APP_MEM_PROT_CHANGE);
* FIXME: This fix assumes the whole region had the prot &
* type, which is true in the cases we have seen so far, but
* theoretically may not be true. If it isn't true, multiple
* memory areas with different types/protections might have
* been changed in pre_system_call(), so will have to keep a
* list of all vmareas changed. This might be expensive for
* each mprotect syscall to guard against a rare theoretical bug.
*/
ASSERT_CURIOSITY(!dcontext->mprot_multi_areas);
IF_NO_MEMQUERY(memcache_update_locked(
base, base + size, memprot, -1 , true ));
}
}
break;
}
#ifdef ANDROID
case SYS_prctl: {
int code = (int)dcontext->sys_param0;
int subcode = (ulong)dcontext->sys_param1;
if (success && code == PR_SET_VMA && subcode == PR_SET_VMA_ANON_NAME) {
byte *addr = (byte *)dcontext->sys_param2;
size_t len = (size_t)dcontext->sys_param3;
IF_DEBUG(const char *comment = (const char *)dcontext->sys_param4;)
uint memprot = 0;
if (!get_memory_info_from_os(addr, NULL, NULL, &memprot))
memprot = MEMPROT_NONE;
ASSERT((comment == NULL && !TEST(MEMPROT_HAS_COMMENT, memprot)) ||
(comment != NULL && TEST(MEMPROT_HAS_COMMENT, memprot)));
LOG(THREAD, LOG_SYSCALLS, 2,
"syscall: prctl PR_SET_VMA_ANON_NAME base=" PFX " size=" PFX
" comment=%s\n",
addr, len, comment == NULL ? "<null>" : comment);
IF_NO_MEMQUERY(memcache_update_locked(
addr, addr + len, memprot, -1 , true ));
}
break;
}
#endif
#ifdef LINUX
case SYS_brk: {
* This code should work regardless of whether syscall failed
* (if it failed, the old break will be returned). We stored
* the old break in sys_param1 in pre-syscall.
*/
app_pc old_brk = (app_pc)dcontext->sys_param1;
app_pc new_brk = (app_pc)result;
DEBUG_DECLARE(app_pc req_brk = (app_pc)dcontext->sys_param0;);
ASSERT(!DYNAMO_OPTION(emulate_brk));
# ifdef DEBUG
if (DYNAMO_OPTION(early_inject) &&
req_brk != NULL ) {
DO_ONCE({
ASSERT_CURIOSITY(new_brk > old_brk &&
"i#1004: first brk() "
"allocation failed with -early_inject");
});
}
# endif
handle_app_brk(dcontext, NULL, old_brk, new_brk);
break;
}
#endif
#ifdef LINUX
case SYS_clone3:
case SYS_clone: {
LOG(THREAD, LOG_SYSCALLS, 2, "syscall: clone returned " PFX "\n",
MCXT_SYSCALL_RES(mc));
* Please refer to comment on os_switch_lib_tls.
* It is only called in parent thread.
* The child thread's tls setup is done in os_tls_app_seg_init.
*/
if (was_thread_create_syscall(dcontext)) {
if (INTERNAL_OPTION(private_loader))
os_switch_lib_tls(dcontext, false );
if (sysnum == SYS_clone3) {
* app's copy in the SYSCALL_PARAM_CLONE3_CLONE_ARGS reg.
* sys_param1 contains the pointer to DR's clone_args, and
* sys_param0 contains the pointer to the app's original
* clone_args.
*/
# ifdef X86
ASSERT(sys_param(dcontext, SYSCALL_PARAM_CLONE3_CLONE_ARGS) ==
dcontext->sys_param1);
set_syscall_param(dcontext, SYSCALL_PARAM_CLONE3_CLONE_ARGS,
dcontext->sys_param0);
# else
* to hold its return value. As the clone_args pointer isn't available
* post-syscall natively anyway, there's no need to restore here.
*/
# endif
uint app_clone_args_size = (uint)dcontext->sys_param2;
heap_free(dcontext, (clone3_syscall_args_t *)dcontext->sys_param1,
app_clone_args_size HEAPACCT(ACCT_OTHER));
} else if (sysnum == SYS_clone) {
set_syscall_param(dcontext, SYSCALL_PARAM_CLONE_STACK,
dcontext->sys_param1);
}
}
break;
}
#elif defined(MACOS) && !defined(X64)
case SYS_bsdthread_create: {
ASSERT(*sys_param_addr(dcontext, 0) == (reg_t)new_bsdthread_intercept);
*sys_param_addr(dcontext, 0) = dcontext->sys_param0;
*sys_param_addr(dcontext, 1) = dcontext->sys_param1;
break;
}
#endif
#ifdef SYS_fork
case SYS_fork: {
LOG(THREAD, LOG_SYSCALLS, 2, "syscall: fork returned " PFX "\n",
MCXT_SYSCALL_RES(mc));
break;
}
#endif
#ifdef SYS_vfork
case SYS_vfork: {
LOG(THREAD, LOG_SYSCALLS, 2, "syscall: vfork returned " PFX "\n",
MCXT_SYSCALL_RES(mc));
IF_LINUX(ASSERT(was_thread_create_syscall(dcontext)));
LOG(THREAD, LOG_SYSCALLS, 2, "vfork: restoring xsp from " PFX " to " PFX "\n",
mc->xsp, dcontext->sys_param1);
mc->xsp = dcontext->sys_param1;
if (MCXT_SYSCALL_RES(mc) != 0) {
* Please refer to comment on os_switch_lib_tls.
* It is only called in parent thread.
* The child thread's tls setup is done in os_tls_app_seg_init.
*/
if (INTERNAL_OPTION(private_loader)) {
os_switch_lib_tls(dcontext, false );
}
}
break;
}
#endif
case SYS_execve: {
success = false;
mark_thread_execve(dcontext->thread_record, false);
ASSERT(result < 0);
LOG(THREAD, LOG_SYSCALLS, 2, "syscall: execve failed\n");
handle_execve_post(dcontext);
* the switch(). */
goto exit_post_system_call;
break;
}
case IF_MACOS_ELSE(SYS_sigaction, SYS_rt_sigaction): {
asmlinkage long
sys_rt_sigaction(int sig, const struct sigaction *act,
struct sigaction *oact, size_t sigsetsize)
*/
int sig = (int)dcontext->sys_param0;
const kernel_sigaction_t *act = (const kernel_sigaction_t *)dcontext->sys_param1;
prev_sigaction_t *oact = (prev_sigaction_t *)dcontext->sys_param2;
size_t sigsetsize = (size_t)dcontext->sys_param3;
uint res;
res = handle_post_sigaction(dcontext, success, sig, act, oact, sigsetsize);
LOG(THREAD, LOG_SYSCALLS, 2, "syscall: %ssigaction => %d\n",
IF_MACOS_ELSE("", "rt_"), -res);
if (res != 0)
set_failure_return_val(dcontext, res);
if (!success || res != 0)
goto exit_post_system_call;
break;
}
#if defined(LINUX) && !defined(X64)
case SYS_sigaction: {
int sig = (int)dcontext->sys_param0;
const old_sigaction_t *act = (const old_sigaction_t *)dcontext->sys_param1;
old_sigaction_t *oact = (old_sigaction_t *)dcontext->sys_param2;
uint res = handle_post_old_sigaction(dcontext, success, sig, act, oact);
LOG(THREAD, LOG_SYSCALLS, 2, "syscall: sigaction => %d\n", -res);
if (res != 0)
set_failure_return_val(dcontext, res);
if (!success || res != 0)
goto exit_post_system_call;
break;
}
#endif
case IF_MACOS_ELSE(SYS_sigprocmask, SYS_rt_sigprocmask): {
asmlinkage long
sys_rt_sigprocmask(int how, sigset_t *set, sigset_t *oset,
size_t sigsetsize)
*/
int status = handle_post_sigprocmask(
dcontext, (int)dcontext->sys_param0, (kernel_sigset_t *)dcontext->sys_param1,
(kernel_sigset_t *)dcontext->sys_param2, (size_t)dcontext->sys_param3);
if (status != 0) {
set_failure_return_val(dcontext, status);
}
break;
}
#if defined(LINUX) && !defined(X64)
case SYS_sigreturn:
#endif
case IF_MACOS_ELSE(SYS_sigreturn, SYS_rt_sigreturn):
* assert below
*/
success = true;
break;
case SYS_setitimer:
handle_post_setitimer(dcontext, success, (int)dcontext->sys_param0,
(const struct itimerval *)dcontext->sys_param1,
(struct itimerval *)dcontext->sys_param2);
break;
case SYS_getitimer:
handle_post_getitimer(dcontext, success, (int)dcontext->sys_param0,
(struct itimerval *)dcontext->sys_param1);
break;
#if defined(LINUX) && defined(X86)
case SYS_alarm:
handle_post_alarm(dcontext, success, (unsigned int)dcontext->sys_param0);
break;
#endif
#if defined(LINUX) && defined(X86) && defined(X64)
case SYS_arch_prctl: {
if (success && INTERNAL_OPTION(mangle_app_seg)) {
tls_handle_post_arch_prctl(dcontext, dcontext->sys_param0,
dcontext->sys_param1);
}
break;
}
#endif
#ifdef LINUX
# ifdef SYS_ppoll_time64
case SYS_ppoll_time64:
# endif
case SYS_ppoll: {
if (dcontext->sys_param3 == (reg_t)NULL)
break;
handle_post_extended_syscall_sigmasks(dcontext, success);
set_syscall_param(dcontext, 3, dcontext->sys_param3);
break;
}
# ifdef SYS_pselect6_time64
case SYS_pselect6_time64:
# endif
case SYS_pselect6: {
if (dcontext->sys_param4 == (reg_t)NULL)
break;
typedef struct {
kernel_sigset_t *sigmask;
size_t sizemask;
} data_t;
data_t *data_param = (data_t *)dcontext->sys_param3;
handle_post_extended_syscall_sigmasks(dcontext, success);
if (!safe_write_ex((void *)&data_param->sigmask, sizeof(data_param->sigmask),
&dcontext->sys_param4, NULL)) {
LOG(THREAD, LOG_SYSCALLS, 2, "\tEFAULT for pselect6 post syscall\n");
}
break;
}
case SYS_epoll_pwait: {
if (dcontext->sys_param4 == (reg_t)NULL)
break;
handle_post_extended_syscall_sigmasks(dcontext, success);
set_syscall_param(dcontext, 4, dcontext->sys_param4);
break;
}
#endif
#ifdef SYS_dup2
case SYS_dup2: IF_LINUX(case SYS_dup3:) {
# ifdef LINUX
if (success) {
signal_handle_dup(dcontext, (file_t)sys_param(dcontext, 1),
(file_t)result);
}
# endif
break;
}
#endif
#ifdef MACOS
case SYS_fcntl_nocancel:
#endif
case SYS_fcntl: {
#ifdef LINUX
if (success) {
file_t fd = (long)dcontext->sys_param0;
int cmd = (int)dcontext->sys_param1;
if ((cmd == F_DUPFD || cmd == F_DUPFD_CLOEXEC))
signal_handle_dup(dcontext, fd, (file_t)result);
}
break;
#endif
}
case IF_MACOS_ELSE(SYS_getrlimit, IF_X64_ELSE(SYS_getrlimit, SYS_ugetrlimit)): {
int resource = dcontext->sys_param0;
if (success && resource == RLIMIT_NOFILE) {
struct rlimit *rlim = (struct rlimit *)dcontext->sys_param1;
safe_write_ex(&rlim->rlim_cur, sizeof(rlim->rlim_cur),
&app_rlimit_nofile.rlim_cur, NULL);
safe_write_ex(&rlim->rlim_max, sizeof(rlim->rlim_max),
&app_rlimit_nofile.rlim_max, NULL);
}
break;
}
#if !defined(ARM) && !defined(X64) && !defined(MACOS)
case SYS_getrlimit: {
int resource = dcontext->sys_param0;
if (success && resource == RLIMIT_NOFILE) {
struct compat_rlimit *rlim = (struct compat_rlimit *)dcontext->sys_param1;
safe_write_ex(&rlim->rlim_cur, sizeof(rlim->rlim_cur),
&app_rlimit_nofile.rlim_cur, NULL);
safe_write_ex(&rlim->rlim_max, sizeof(rlim->rlim_max),
&app_rlimit_nofile.rlim_max, NULL);
}
break;
}
#endif
#ifdef LINUX
case SYS_prlimit64: {
int resource = dcontext->sys_param1;
rlimit64_t *rlim = (rlimit64_t *)dcontext->sys_param3;
if (success && resource == RLIMIT_NOFILE && rlim != NULL &&
(dcontext->sys_param0 == 0 || dcontext->sys_param0 == get_process_id())) {
safe_write_ex(rlim, sizeof(*rlim), &app_rlimit_nofile, NULL);
}
break;
}
#endif
#ifdef LINUX
# ifdef SYS_readlink
case SYS_readlink:
# endif
case SYS_readlinkat:
if (success && DYNAMO_OPTION(early_inject)) {
bool is_at = (sysnum == SYS_readlinkat);
* to fix it up if the app queries. Any thread id can be passed to
* /proc/%d/exe, so we have to check. We could instead look for
* libdynamorio.so in the result but we've tweaked our injector
* in the past to exec different binaries so this seems more robust.
*/
if (symlink_is_self_exe((const char *)(is_at ? dcontext->sys_param1
: dcontext->sys_param0))) {
char *tgt = (char *)(is_at ? dcontext->sys_param2 : dcontext->sys_param1);
size_t tgt_sz =
(size_t)(is_at ? dcontext->sys_param3 : dcontext->sys_param2);
int len = snprintf(tgt, tgt_sz, "%s", get_application_name());
if (len > 0)
set_success_return_val(dcontext, len);
else {
set_failure_return_val(dcontext, EINVAL);
DODEBUG({ dcontext->expect_last_syscall_to_fail = true; });
}
}
}
break;
# ifdef SYS_openat2
case SYS_openat2:
# endif
case SYS_openat:
if (dcontext->sys_param0 != 0) {
heap_free(dcontext, (void *)dcontext->sys_param0,
MAXIMUM_PATH HEAPACCT(ACCT_OTHER));
}
break;
case SYS_rseq:
if (success) {
rseq_process_syscall(dcontext);
}
break;
#endif
default:
#ifdef VMX86_SERVER
if (is_vmkuw_sysnum(sysnum)) {
vmkuw_post_system_call(dcontext);
break;
}
#endif
break;
}
DODEBUG({
if (ignorable_system_call_normalized(sysnum)) {
STATS_INC(post_syscall_ignorable);
} else {
* shouldn't happen without us knowing about them. See PR 402769
* for SYS_close case.
*/
if (!(success || sysnum == SYS_close ||
IF_MACOS(sysnum == SYS_close_nocancel ||)
dcontext->expect_last_syscall_to_fail)) {
LOG(THREAD, LOG_SYSCALLS, 1,
"Unexpected failure of non-ignorable syscall %d\n", sysnum);
}
}
});
exit_post_system_call:
* its operations, since DR needs to know the real syscall results,
* and any changes made by the client are simply to fool the app.
* Also, dr_syscall_invoke_another() needs to set eax, which shouldn't
* affect the result of the 1st syscall. Xref i#1.
*/
instrument_post_syscall(dcontext, sysnum);
dcontext->whereami = old_whereami;
}
#ifdef LINUX
# ifdef STATIC_LIBRARY
* (dynamorio_so_start and dynamorio_so_end) to help mitigate
* edge cases in detecting DR's library bounds. They are optional.
*
* If not specified, the variables' location will default to
* weak_dynamorio_so_bounds_filler and they will not be used.
* Note that referencing the value of these symbols will crash:
* always use the address only.
*/
extern int dynamorio_so_start WEAK
__attribute__((alias("weak_dynamorio_so_bounds_filler")));
extern int dynamorio_so_end WEAK
__attribute__((alias("weak_dynamorio_so_bounds_filler")));
static int weak_dynamorio_so_bounds_filler;
# else
* Note that referencing the value of these symbols will crash: always use the
* address only.
*/
extern int dynamorio_so_start, dynamorio_so_end;
# endif
#endif
* release-time assert if there is a problem doing so. It does not use any
* heap, and we assume it is called prior to find_executable_vm_areas in a
* single thread.
*/
static void
get_dynamo_library_bounds(void)
{
if (dynamorio_library_filepath[0] != '\0')
return;
* Windows, so we should unload it like we do there. The other reason not to
* count it is so is_in_dynamo_dll() can be the only exception to the
* never-execute-from-DR-areas list rule
*/
app_pc check_start, check_end;
bool do_memquery = true;
#ifdef STATIC_LIBRARY
# ifdef LINUX
* know our "library name" since we are in the app. When we have both we
* don't need to do a memquery.
*/
if (&dynamorio_so_start != &weak_dynamorio_so_bounds_filler &&
&dynamorio_so_end != &weak_dynamorio_so_bounds_filler) {
do_memquery = false;
dynamo_dll_start = (app_pc)&dynamorio_so_start;
dynamo_dll_end = (app_pc)ALIGN_FORWARD(&dynamorio_so_end, PAGE_SIZE);
LOG(GLOBAL, LOG_VMAREAS, 2,
"Using dynamorio_so_start and dynamorio_so_end for library bounds"
"\n");
const char *dr_path = get_application_name();
strncpy(dynamorio_library_filepath, dr_path,
BUFFER_SIZE_ELEMENTS(dynamorio_library_filepath));
NULL_TERMINATE_BUFFER(dynamorio_library_filepath);
const char *slash = strrchr(dr_path, '/');
ASSERT(slash != NULL);
size_t copy_chars = 1 + slash - dr_path;
ASSERT(copy_chars < BUFFER_SIZE_ELEMENTS(dynamorio_library_path));
strncpy(dynamorio_library_path, dr_path, copy_chars);
dynamorio_library_path[copy_chars] = '\0';
}
# endif
if (do_memquery) {
check_start = (app_pc)&get_dynamo_library_bounds;
}
#else
# ifdef LINUX
* Note that referencing the value of these symbols will crash:
* always use the address only.
*/
extern int dynamorio_so_start, dynamorio_so_end;
dynamo_dll_start = (app_pc)&dynamorio_so_start;
dynamo_dll_end = (app_pc)ALIGN_FORWARD(&dynamorio_so_end, PAGE_SIZE);
# elif defined(MACOS)
dynamo_dll_start = module_dynamorio_lib_base();
# endif
check_start = dynamo_dll_start;
#endif
if (do_memquery) {
DEBUG_DECLARE(int res =)
memquery_library_bounds(NULL, &check_start, &check_end, dynamorio_library_path,
BUFFER_SIZE_ELEMENTS(dynamorio_library_path),
dynamorio_libname_buf,
BUFFER_SIZE_ELEMENTS(dynamorio_libname_buf));
ASSERT(res > 0);
#ifndef STATIC_LIBRARY
dynamorio_libname = IF_UNIT_TEST_ELSE(UNIT_TEST_EXE_NAME, dynamorio_libname_buf);
#endif
snprintf(dynamorio_library_filepath,
BUFFER_SIZE_ELEMENTS(dynamorio_library_filepath), "%s%s",
dynamorio_library_path, dynamorio_libname);
NULL_TERMINATE_BUFFER(dynamorio_library_filepath);
#if !defined(STATIC_LIBRARY) && defined(LINUX)
ASSERT(check_start == dynamo_dll_start && check_end == dynamo_dll_end);
#elif defined(MACOS)
ASSERT(check_start == dynamo_dll_start);
dynamo_dll_end = check_end;
#else
dynamo_dll_start = check_start;
dynamo_dll_end = check_end;
#endif
}
LOG(GLOBAL, LOG_VMAREAS, 1, PRODUCT_NAME " library path: %s\n",
dynamorio_library_path);
LOG(GLOBAL, LOG_VMAREAS, 1, PRODUCT_NAME " library file path: %s\n",
dynamorio_library_filepath);
LOG(GLOBAL, LOG_VMAREAS, 1, "DR library bounds: " PFX " to " PFX "\n",
dynamo_dll_start, dynamo_dll_end);
if (dynamo_dll_start == NULL || dynamo_dll_end == NULL) {
REPORT_FATAL_ERROR_AND_EXIT(FAILED_TO_FIND_DR_BOUNDS, 2, get_application_name(),
get_application_pid());
}
}
* Assumed to be called at single-threaded initialization time as it sets
* global buffers.
* get_dynamo_library_bounds() must be called first.
*/
static void
get_alt_dynamo_library_bounds(void)
{
if (dynamorio_alt_arch_filepath[0] != '\0')
return;
ASSERT(dynamorio_library_path[0] != '\0');
ASSERT(d_r_config_initialized());
const char *config_alt_path = get_config_val(DYNAMORIO_VAR_ALTINJECT);
if (config_alt_path != NULL && config_alt_path[0] != '\0') {
strncpy(dynamorio_alt_arch_filepath, config_alt_path,
BUFFER_SIZE_ELEMENTS(dynamorio_alt_arch_filepath));
NULL_TERMINATE_BUFFER(dynamorio_alt_arch_filepath);
* we compute it for the legacy code.
*/
const char *sep = strrchr(dynamorio_alt_arch_filepath, '/');
if (sep != NULL) {
strncpy(dynamorio_alt_arch_path, dynamorio_alt_arch_filepath,
sep - dynamorio_alt_arch_filepath);
NULL_TERMINATE_BUFFER(dynamorio_alt_arch_path);
}
LOG(GLOBAL, LOG_VMAREAS, 1, PRODUCT_NAME " alt arch filepath: %s\n",
dynamorio_alt_arch_filepath);
LOG(GLOBAL, LOG_VMAREAS, 1, PRODUCT_NAME " alt arch path: %s\n",
dynamorio_alt_arch_path);
} else {
strncpy(dynamorio_alt_arch_path, dynamorio_library_path,
BUFFER_SIZE_ELEMENTS(dynamorio_alt_arch_path));
char *libdir =
strstr(dynamorio_alt_arch_path, IF_X64_ELSE(DR_LIBDIR_X64, DR_LIBDIR_X86));
if (libdir != NULL) {
const char *newdir = IF_X64_ELSE(DR_LIBDIR_X86, DR_LIBDIR_X64);
strncpy(libdir, newdir, strlen(newdir));
} else {
SYSLOG_INTERNAL_WARNING("unable to determine lib path for cross-arch execve");
}
NULL_TERMINATE_BUFFER(dynamorio_alt_arch_path);
LOG(GLOBAL, LOG_VMAREAS, 1, PRODUCT_NAME " alt arch path: %s\n",
dynamorio_alt_arch_path);
snprintf(dynamorio_alt_arch_filepath,
BUFFER_SIZE_ELEMENTS(dynamorio_alt_arch_filepath), "%s%s",
dynamorio_alt_arch_path, dynamorio_libname);
NULL_TERMINATE_BUFFER(dynamorio_alt_arch_filepath);
LOG(GLOBAL, LOG_VMAREAS, 1, PRODUCT_NAME " alt arch filepath: %s\n",
dynamorio_alt_arch_filepath);
}
}
char *
get_dynamorio_library_path(void)
{
if (!dynamorio_library_filepath[0]) {
get_dynamo_library_bounds();
}
return dynamorio_library_filepath;
}
#ifdef LINUX
* string on error.
* FIXME i#47: This will return DR's path when using early injection.
*/
static char *
read_proc_self_exe(bool ignore_cache)
{
static char exepath[MAXIMUM_PATH];
static bool tried = false;
# ifdef MACOS
ASSERT_NOT_IMPLEMENTED(false);
# endif
if (!tried || ignore_cache) {
tried = true;
* but we have no alternative solution except assuming the first
* /proc/self/maps entry is the executable
*/
ssize_t res;
DEBUG_DECLARE(int len =)
snprintf(exepath, BUFFER_SIZE_ELEMENTS(exepath), "/proc/%d/exe",
get_process_id());
ASSERT(len > 0);
NULL_TERMINATE_BUFFER(exepath);
# ifdef SYS_readlink
res = dynamorio_syscall(SYS_readlink, 3, exepath, exepath,
BUFFER_SIZE_ELEMENTS(exepath) - 1);
# else
res = dynamorio_syscall(SYS_readlinkat, 4, AT_FDCWD, exepath, exepath,
BUFFER_SIZE_ELEMENTS(exepath) - 1);
# endif
ASSERT(res < BUFFER_SIZE_ELEMENTS(exepath));
exepath[MAX(res, 0)] = '\0';
NULL_TERMINATE_BUFFER(exepath);
}
return exepath;
}
#endif
app_pc
get_application_base(void)
{
if (executable_start == NULL) {
#if defined(STATIC_LIBRARY)
executable_start = get_dynamorio_dll_start();
executable_end = get_dynamorio_dll_end();
#elif defined(HAVE_MEMINFO)
const char *name = get_application_name();
if (name != NULL && name[0] != '\0') {
DEBUG_DECLARE(int count =)
memquery_library_bounds(name, &executable_start, &executable_end, NULL, 0,
NULL, 0);
ASSERT(count > 0 && executable_start != NULL);
}
#else
#endif
}
return executable_start;
}
app_pc
get_application_end(void)
{
if (executable_end == NULL)
get_application_base();
return executable_end;
}
app_pc
get_image_entry()
{
static app_pc image_entry_point = NULL;
if (image_entry_point == NULL && executable_start != NULL) {
module_area_t *ma;
os_get_module_info_lock();
ma = module_pc_lookup(executable_start);
ASSERT(ma != NULL);
if (ma != NULL) {
ASSERT(executable_start == ma->start);
SELF_UNPROTECT_DATASEC(DATASEC_RARELY_PROT);
image_entry_point = ma->entry_point;
SELF_PROTECT_DATASEC(DATASEC_RARELY_PROT);
}
os_get_module_info_unlock();
}
return image_entry_point;
}
#ifdef DEBUG
void
mem_stats_snapshot()
{
}
#endif
bool
is_in_dynamo_dll(app_pc pc)
{
ASSERT(dynamo_dll_start != NULL);
#ifdef VMX86_SERVER
* execution (_init calls os_in_vmkernel_classic()) and for
* reporting crashes as our fault
*/
if (vmk_in_vmklib(pc))
return true;
#endif
return (pc >= dynamo_dll_start && pc < dynamo_dll_end);
}
app_pc
get_dynamorio_dll_start()
{
if (dynamo_dll_start == NULL)
get_dynamo_library_bounds();
ASSERT(dynamo_dll_start != NULL);
return dynamo_dll_start;
}
app_pc
get_dynamorio_dll_end()
{
if (dynamo_dll_end == NULL)
get_dynamo_library_bounds();
ASSERT(dynamo_dll_end != NULL);
return dynamo_dll_end;
}
app_pc
get_dynamorio_dll_preferred_base()
{
* therefore is always equal to dynamo_dll_start */
return get_dynamorio_dll_start();
}
static void
found_vsyscall_page(memquery_iter_t *iter _IF_DEBUG(OUT const char **map_type))
{
#ifndef X64
* hardcoded address shouldn't have any false positives.
*/
ASSERT(iter->vm_end - iter->vm_start == PAGE_SIZE ||
iter->vm_end - iter->vm_start == 2 * PAGE_SIZE);
ASSERT(!dynamo_initialized);
* thus we aren't adjusting our code origins policies to remove the
* vsyscall page exemption.
*/
DODEBUG({ *map_type = "VDSO"; });
* so take just the first one as the start (xref i#2157).
*/
if (vdso_page_start == NULL) {
vdso_page_start = iter->vm_start;
vdso_size = iter->vm_end - iter->vm_start;
}
* see a syscall we don't know and we point it at the vdso start.
*/
if (vsyscall_page_start == NULL)
vsyscall_page_start = iter->vm_start;
LOG(GLOBAL, LOG_VMAREAS, 1, "found vdso/vsyscall pages @ " PFX " %s\n",
vsyscall_page_start, iter->comment);
#else
* fix bugs for OS where vdso page is set unreadable as below
* ffffffffff600000-ffffffffffe00000 ---p 00000000 00:00 0 [vdso]
* but it is readable indeed.
*/
* fix bugs for OS where vdso page is set unreadable as below
* ffffffffff600000-ffffffffffe00000 ---p 00000000 00:00 0 [vsyscall]
* but it is readable indeed.
*/
if (!TESTALL((PROT_READ | PROT_EXEC), iter->prot))
iter->prot |= (PROT_READ | PROT_EXEC);
if (strncmp(iter->comment, VSYSCALL_PAGE_MAPS_NAME,
strlen(VSYSCALL_PAGE_MAPS_NAME)) == 0)
vdso_page_start = iter->vm_start;
else if (strncmp(iter->comment, VSYSCALL_REGION_MAPS_NAME,
strlen(VSYSCALL_REGION_MAPS_NAME)) == 0)
vsyscall_page_start = iter->vm_start;
#endif
}
#ifndef HAVE_MEMINFO_QUERY
static void
add_to_memcache(byte *region_start, byte *region_end, void *user_data)
{
memcache_update_locked(region_start, region_end, MEMPROT_NONE, DR_MEMTYPE_DATA,
false );
}
#endif
int
os_walk_address_space(memquery_iter_t *iter, bool add_modules)
{
int count = 0;
#ifdef MACOS
app_pc shared_start, shared_end;
bool have_shared = module_dyld_shared_region(&shared_start, &shared_end);
#endif
#ifdef RETURN_AFTER_CALL
dcontext_t *dcontext = get_thread_private_dcontext();
os_thread_data_t *ostd = (os_thread_data_t *)dcontext->os_field;
#endif
#ifndef HAVE_MEMINFO_QUERY
* adding a single no-access region for the whole vmheap.
* Queries from heap routines use _from_os.
* Queries in check_thread_vm_area are fine getting "noaccess": wants
* any DR memory not on exec areas list to be noaccess.
* Queries from clients: should be ok to hide innards. Marking noaccess
* should be safer than marking free, as unruly client might try to mmap
* something in the free space: better to have it think it's reserved but
* not yet used memory. FIXME: we're not marking beyond-vmheap DR regions
* as noaccess!
*/
iterate_vmm_regions(add_to_memcache, NULL);
#endif
#ifndef HAVE_MEMINFO
count = find_vm_areas_via_probe();
#else
while (memquery_iterator_next(iter)) {
bool image = false;
size_t size = iter->vm_end - iter->vm_start;
bool skip = dynamo_vm_area_overlap(iter->vm_start, iter->vm_end) &&
!is_in_dynamo_dll(iter->vm_start)
&& !is_in_client_lib(iter->vm_start);
DEBUG_DECLARE(const char *map_type = "Private");
* our passing NULL preventing rwx regions from being added to executable
* or future list, even w/ -executable_if_alloc
*/
LOG(GLOBAL, LOG_VMAREAS, 2, "start=" PFX " end=" PFX " prot=%x comment=%s\n",
iter->vm_start, iter->vm_end, iter->prot, iter->comment);
* which causes ASSERT_CURIOSITY in mmap_check_for_module_overlap fail.
* b7fa3000-b7fbd000 r-xp 00000000 08:01 108679 /lib/ld-2.8.90.so
* b7fbd000-b7fbe000 r-xp b7fbd000 00:00 0 [vdso]
* b7fbe000-b7fbf000 r--p 0001a000 08:01 108679 /lib/ld-2.8.90.so
* b7fbf000-b7fc0000 rw-p 0001b000 08:01 108679 /lib/ld-2.8.90.so
* So we always first check if it is a vdso page before calling
* mmap_check_for_module_overlap.
* Update: with i#160/PR 562667 handling non-contiguous modules like
* ld.so we now gracefully handle other objects like vdso in gaps in
* module, but it's simpler to leave this ordering here.
*/
if (skip) {
LOG(GLOBAL, LOG_VMAREAS, 2, PFX "-" PFX " skipping: internal DR region\n",
iter->vm_start, iter->vm_end);
# ifdef MACOS
} else if (have_shared && iter->vm_start >= shared_start &&
iter->vm_start < shared_end) {
* as we'll fail to identify the library. We add them
* in module_walk_dyld_list instead.
*/
image = true;
# endif
} else if (strncmp(iter->comment, VSYSCALL_PAGE_MAPS_NAME,
strlen(VSYSCALL_PAGE_MAPS_NAME)) == 0 ||
IF_X64_ELSE(strncmp(iter->comment, VSYSCALL_REGION_MAPS_NAME,
strlen(VSYSCALL_REGION_MAPS_NAME)) == 0,
* hardcoded there.
*/
iter->vm_start == VSYSCALL_PAGE_START_HARDCODED)) {
if (add_modules) {
found_vsyscall_page(iter _IF_DEBUG(&map_type));
* separate from vdso it has no ELF header which is too complex
* to force into the module list.
*/
if (module_is_header(iter->vm_start, iter->vm_end - iter->vm_start)) {
module_list_add(iter->vm_start, iter->vm_end - iter->vm_start, false,
iter->comment, iter->inode);
}
}
} else if (add_modules &&
mmap_check_for_module_overlap(iter->vm_start, size,
TEST(MEMPROT_READ, iter->prot),
iter->inode, false)) {
image = true;
DODEBUG({ map_type = "ELF SO"; });
} else if (TEST(MEMPROT_READ, iter->prot) &&
module_is_header(iter->vm_start, size)) {
DEBUG_DECLARE(size_t image_size = size;)
app_pc mod_base, mod_first_end, mod_max_end;
char *exec_match;
bool found_exec = false;
image = true;
DODEBUG({ map_type = "ELF SO"; });
LOG(GLOBAL, LOG_VMAREAS, 2,
"Found already mapped module first segment :\n"
"\t" PFX "-" PFX "%s inode=" UINT64_FORMAT_STRING " name=%s\n",
iter->vm_start, iter->vm_end, TEST(MEMPROT_EXEC, iter->prot) ? " +x" : "",
iter->inode, iter->comment);
# ifdef LINUX
* map is placed on top (i#2566)
*/
ASSERT_CURIOSITY(iter->inode != 0 || iter->comment[0] == '\0');
# endif
ASSERT_CURIOSITY(iter->offset == 0);
if (module_walk_program_headers(iter->vm_start, size, false,
true,
&mod_base, &mod_first_end, &mod_max_end, NULL,
NULL)) {
DODEBUG(image_size = mod_max_end - mod_base;);
} else {
ASSERT_NOT_REACHED();
}
LOG(GLOBAL, LOG_VMAREAS, 2,
"Found already mapped module total module :\n"
"\t" PFX "-" PFX " inode=" UINT64_FORMAT_STRING " name=%s\n",
iter->vm_start, iter->vm_start + image_size, iter->inode, iter->comment);
if (add_modules) {
const char *modpath = iter->comment;
# ifdef LINUX
exec_match = get_application_name();
if (exec_match != NULL && exec_match[0] != '\0')
found_exec = (strcmp(iter->comment, exec_match) == 0);
if (!found_exec && executable_start != NULL &&
executable_start == iter->vm_start) {
found_exec = true;
* presence of mremapping for hugepages (i#2566; i#3387) (this
* could happen for libraries too, but we don't have alternatives
* there). Or, it may have an incorrect path. Prefer the path
* we recorded in early injection or obtained from
* /proc/self/exe.
*/
modpath = get_application_name();
}
# else
* it. But, we can distinguish an exe from a lib here, even for PIE,
* so we go with that plus a basename comparison.
*/
exec_match = (char *)get_application_short_name();
if (module_is_executable(iter->vm_start) && exec_match != NULL &&
exec_match[0] != '\0') {
const char *iter_basename = strrchr(iter->comment, '/');
if (iter_basename == NULL)
iter_basename = iter->comment;
else
iter_basename++;
found_exec = (strcmp(iter_basename, exec_match) == 0);
}
# endif
if (found_exec) {
if (executable_start == NULL)
executable_start = iter->vm_start;
else
ASSERT(iter->vm_start == executable_start);
LOG(GLOBAL, LOG_VMAREAS, 2,
"Found executable %s @" PFX "-" PFX " %s\n",
get_application_name(), iter->vm_start,
iter->vm_start + image_size, iter->comment);
}
* first segment's size. We'll update it in module_list_add().
*/
module_list_add(iter->vm_start, mod_first_end - mod_base, false, modpath,
iter->inode);
# ifdef MACOS
if (strcmp(iter->comment, "/usr/lib/dyld") == 0)
module_walk_dyld_list(iter->vm_start);
# endif
}
} else if (iter->inode != 0) {
DODEBUG({ map_type = "Mapped File"; });
}
# ifndef HAVE_MEMINFO_QUERY
if (!is_vmm_reserved_address(iter->vm_start, iter->vm_end - iter->vm_start, NULL,
NULL)) {
LOG(GLOBAL, LOG_VMAREAS, 4,
"os_walk_address_space: adding: " PFX "-" PFX " prot=%d\n",
iter->vm_start, iter->vm_end, iter->prot);
memcache_update_locked(iter->vm_start, iter->vm_end, iter->prot,
image ? DR_MEMTYPE_IMAGE : DR_MEMTYPE_DATA,
false );
}
# endif
* it has no way of determining if this is a stack b/c we don't have
* a dcontext at this point -- so we just don't pass the stack
*/
if (!skip &&
add_modules &&
app_memory_allocation(NULL, iter->vm_start, (iter->vm_end - iter->vm_start),
iter->prot, image _IF_DEBUG(map_type))) {
count++;
}
}
#endif
#ifndef HAVE_MEMINFO_QUERY
DOLOG(4, LOG_VMAREAS, memcache_print(GLOBAL, "init: all memory areas:\n"););
#endif
#ifdef RETURN_AFTER_CALL
ostd->stack_bottom_pc = find_stack_bottom();
LOG(THREAD, LOG_ALL, 1, "Stack bottom pc = " PFX "\n", ostd->stack_bottom_pc);
#endif
LOG(GLOBAL, LOG_VMAREAS, 2, "Module list after memory walk\n");
DOLOG(1, LOG_VMAREAS, {
if (add_modules)
print_modules(GLOBAL, DUMP_NOT_XML);
});
return count;
}
int
find_executable_vm_areas(void)
{
int count;
memquery_iter_t iter;
memquery_iterator_start(&iter, NULL, true );
count = os_walk_address_space(&iter, true);
memquery_iterator_stop(&iter);
STATS_ADD(num_app_code_modules, count);
get_libc_errno_location(true );
return count;
}
* does not use any heap.
* assumed to be called prior to find_executable_vm_areas.
*/
int
find_dynamo_library_vm_areas(void)
{
#ifndef STATIC_LIBRARY
* We don't bother to break down the sub-regions.
* Assumption: we don't need to have the protection flags for DR sub-regions.
* For static library builds, DR's code is in the exe and isn't considered
* to be a DR area.
*/
add_dynamo_vm_area(get_dynamorio_dll_start(), get_dynamorio_dll_end(),
MEMPROT_READ | MEMPROT_WRITE | MEMPROT_EXEC,
true _IF_DEBUG(dynamorio_library_filepath));
#endif
#ifdef VMX86_SERVER
if (os_in_vmkernel_userworld())
vmk_add_vmklib_to_dynamo_areas();
#endif
return 1;
}
bool
get_stack_bounds(dcontext_t *dcontext, byte **base, byte **top)
{
os_thread_data_t *ostd = (os_thread_data_t *)dcontext->os_field;
if (ostd->stack_base == NULL) {
* FIXME: the comment here -- ignoring it for now, if hit cases confirming
* it the right thing will be to merge adjacent rwx regions and assume
* their union is the stack -- otherwise have to have special stack init
* routine called from x86.asm new_thread_dynamo_start and internal_dynamo_start,
* and the latter is not a do-once...
*/
size_t size = 0;
bool ok;
* /proc/self/maps w/ later mprotects and it can be hard to piece together later
*/
if (IF_MEMQUERY_ELSE(false, DYNAMO_OPTION(use_all_memory_areas))) {
ok = get_memory_info((app_pc)get_mcontext(dcontext)->xsp, &ostd->stack_base,
&size, NULL);
} else {
ok = get_memory_info_from_os((app_pc)get_mcontext(dcontext)->xsp,
&ostd->stack_base, &size, NULL);
}
if (!ok) {
* the app.
*/
ASSERT(!dynamo_started);
return false;
}
ostd->stack_top = ostd->stack_base + size;
LOG(THREAD, LOG_THREADS, 1, "App stack is " PFX "-" PFX "\n", ostd->stack_base,
ostd->stack_top);
}
if (base != NULL)
*base = ostd->stack_base;
if (top != NULL)
*top = ostd->stack_top;
return true;
}
#ifdef RETURN_AFTER_CALL
initial_call_stack_status_t
at_initial_stack_bottom(dcontext_t *dcontext, app_pc target_pc)
{
* b/c we can't always walk the call stack (PR 608990) so we
* use the image entry as our primary trigger
*/
if (executable_start != NULL && reached_image_entry_yet()) {
return INITIAL_STACK_EMPTY;
} else {
* that's better than false negatives if we miss the image entry
* or we were unable to find the executable_start
*/
os_thread_data_t *ostd = (os_thread_data_t *)dcontext->os_field;
if (target_pc == ostd->stack_bottom_pc) {
return INITIAL_STACK_BOTTOM_REACHED;
} else {
return INITIAL_STACK_BOTTOM_NOT_REACHED;
}
}
}
#endif
bool
query_memory_ex(const byte *pc, OUT dr_mem_info_t *out_info)
{
#ifdef HAVE_MEMINFO_QUERY
return query_memory_ex_from_os(pc, out_info);
#else
return memcache_query_memory(pc, out_info);
#endif
}
bool
query_memory_cur_base(const byte *pc, OUT dr_mem_info_t *info)
{
return query_memory_ex(pc, info);
}
bool
get_memory_info(const byte *pc, byte **base_pc, size_t *size,
uint *prot )
{
dr_mem_info_t info;
if (is_vmm_reserved_address((byte *)pc, 1, NULL, NULL)) {
if (!query_memory_ex_from_os(pc, &info) || info.type == DR_MEMTYPE_FREE)
return false;
} else {
if (!query_memory_ex(pc, &info) || info.type == DR_MEMTYPE_FREE)
return false;
}
if (base_pc != NULL)
*base_pc = info.base_pc;
if (size != NULL)
*size = info.size;
if (prot != NULL)
*prot = info.prot;
return true;
}
* b/c the caller is in a fragile location and cannot acquire locks, so
* we try to do the same here.
*/
bool
query_memory_ex_from_os(const byte *pc, OUT dr_mem_info_t *info)
{
bool have_type = false;
bool res = memquery_from_os(pc, info, &have_type);
if (!res) {
info->type = DR_MEMTYPE_ERROR;
} else if (res && !have_type) {
* get SIGBUS if beyond end of mmapped file: not uncommon if querying
* in middle of library load before .bss fully set up (PR 528744).
* However, if there is no fault handler, is_elf_so_header's safe_read will
* recurse to here, so in that case we use info->size but we assume
* it's only at init or exit and so not in the middle of a load
* and less likely to be querying a random mmapped file.
* The cleaner fix is to allow safe_read to work w/o a dcontext or
* fault handling: i#350/PR 529066.
*/
if (TEST(MEMPROT_READ, info->prot) &&
module_is_header(info->base_pc, fault_handling_initialized ? 0 : info->size))
info->type = DR_MEMTYPE_IMAGE;
else {
* image: can be doubly-mapped so; don't want to count vdso; etc.
*/
info->type = DR_MEMTYPE_DATA;
}
}
return res;
}
bool
get_memory_info_from_os(const byte *pc, byte **base_pc, size_t *size,
uint *prot )
{
dr_mem_info_t info;
if (!query_memory_ex_from_os(pc, &info) || info.type == DR_MEMTYPE_FREE)
return false;
if (base_pc != NULL)
*base_pc = info.base_pc;
if (size != NULL)
*size = info.size;
if (prot != NULL)
*prot = info.prot;
return true;
}
extern void
deadlock_avoidance_unlock(mutex_t *lock, bool ownable);
void
mutex_wait_contended_lock(mutex_t *lock, priv_mcontext_t *mc)
{
dcontext_t *dcontext = get_thread_private_dcontext();
bool set_client_safe_for_synch =
((dcontext != NULL) && IS_CLIENT_THREAD(dcontext) &&
((mutex_t *)dcontext->client_data->client_grab_mutex == lock));
if (mc != NULL) {
ASSERT(dcontext != NULL);
* an mcontext to return into: nothing would be able to reset the
* client_thread_safe_for_sync flag.
*/
ASSERT(!set_client_safe_for_synch);
*get_mcontext(dcontext) = *mc;
}
if (ksynch_kernel_support()) {
* > LOCK_SET_STATE (we don't rely on paired incs/decs) so that
* the next unlocker will call mutex_notify_released_lock().
*/
ptr_int_t res;
#ifndef LINUX
KSYNCH_TYPE *event = mutex_get_contended_event(lock);
ASSERT(event != NULL && ksynch_var_initialized(event));
#endif
while (atomic_exchange_int(&lock->lock_requests, LOCK_CONTENDED_STATE) !=
LOCK_FREE_STATE) {
if (set_client_safe_for_synch)
dcontext->client_data->client_thread_safe_for_synch = true;
if (mc != NULL)
set_synch_state(dcontext, THREAD_SYNCH_VALID_MCONTEXT);
* We have to use lock_requests as the futex to avoid waiting if
* lock_requests changes, while on Mac the underlying synch prevents
* a wait there.
*/
#ifdef LINUX
* We can't have a series of changes that result in no apparent
* change w/o someone acquiring the lock, b/c
* mutex_notify_released_lock() sets lock_requests to LOCK_FREE_STATE.
*/
res = ksynch_wait(&lock->lock_requests, LOCK_CONTENDED_STATE, 0);
#else
res = ksynch_wait(event, 0, 0);
#endif
if (res != 0 && res != -EWOULDBLOCK)
os_thread_yield();
if (set_client_safe_for_synch)
dcontext->client_data->client_thread_safe_for_synch = false;
if (mc != NULL)
set_synch_state(dcontext, THREAD_SYNCH_NONE);
* (res==-EWOULDBLOCK), or error: regardless, someone else
* could have acquired the lock, so we try again
*/
}
} else {
atomic_dec_and_test(&lock->lock_requests);
while (!d_r_mutex_trylock(lock)) {
if (set_client_safe_for_synch)
dcontext->client_data->client_thread_safe_for_synch = true;
if (mc != NULL)
set_synch_state(dcontext, THREAD_SYNCH_VALID_MCONTEXT);
os_thread_yield();
if (set_client_safe_for_synch)
dcontext->client_data->client_thread_safe_for_synch = false;
if (mc != NULL)
set_synch_state(dcontext, THREAD_SYNCH_NONE);
}
#ifdef DEADLOCK_AVOIDANCE
* avoid two in a row (causes assertion on owner) we unlock here
* In the future we will remove the trylock here and this will go away.
*/
deadlock_avoidance_unlock(lock, true);
#endif
}
return;
}
void
mutex_notify_released_lock(mutex_t *lock)
{
if (ksynch_kernel_support()) {
* resulting in a futex_wait value match w/o anyone owning the lock
*/
lock->lock_requests = LOCK_FREE_STATE;
#ifdef LINUX
ksynch_wake(&lock->lock_requests);
#else
ksynch_wake(&lock->contended_event);
#endif
}
}
helpers to guarantee the lock is held (unlike mutexes) so simple
yields are still acceptable.
*/
void
rwlock_wait_contended_writer(read_write_lock_t *rwlock)
{
os_thread_yield();
}
void
rwlock_notify_writer(read_write_lock_t *rwlock)
{
}
void
rwlock_wait_contended_reader(read_write_lock_t *rwlock)
{
os_thread_yield();
}
void
rwlock_notify_readers(read_write_lock_t *rwlock)
{
}
typedef struct linux_event_t {
* thread(s). See i#96/PR 295561.
*/
KSYNCH_TYPE signaled;
mutex_t lock;
bool broadcast;
} linux_event_t;
* assign different ranks to all events for DEADLOCK_AVOIDANCE.
* Currently a single rank seems to work.
*/
event_t
create_event(void)
{
event_t e = (event_t)global_heap_alloc(sizeof(linux_event_t) HEAPACCT(ACCT_OTHER));
ksynch_init_var(&e->signaled);
ASSIGN_INIT_LOCK_FREE(e->lock, event_lock);
e->broadcast = false;
return e;
}
event_t
create_broadcast_event(void)
{
event_t e = create_event();
e->broadcast = true;
return e;
}
void
destroy_event(event_t e)
{
DELETE_LOCK(e->lock);
ksynch_free_var(&e->signaled);
global_heap_free(e, sizeof(linux_event_t) HEAPACCT(ACCT_OTHER));
}
void
signal_event(event_t e)
{
d_r_mutex_lock(&e->lock);
ksynch_set_value(&e->signaled, 1);
if (e->broadcast)
ksynch_wake_all(&e->signaled);
else
ksynch_wake(&e->signaled);
LOG(THREAD_GET, LOG_THREADS, 3, "thread " TIDFMT " signalling event " PFX "\n",
d_r_get_thread_id(), e);
d_r_mutex_unlock(&e->lock);
}
void
reset_event(event_t e)
{
d_r_mutex_lock(&e->lock);
ksynch_set_value(&e->signaled, 0);
LOG(THREAD_GET, LOG_THREADS, 3, "thread " TIDFMT " resetting event " PFX "\n",
d_r_get_thread_id(), e);
d_r_mutex_unlock(&e->lock);
}
bool
wait_for_event(event_t e, int timeout_ms)
{
#ifdef DEBUG
dcontext_t *dcontext = get_thread_private_dcontext();
#endif
uint64 start_time, cur_time;
if (timeout_ms > 0)
start_time = query_time_millis();
LOG(THREAD, LOG_THREADS, 3, "thread " TIDFMT " waiting for event " PFX "\n",
d_r_get_thread_id(), e);
do {
if (ksynch_get_value(&e->signaled) == 1) {
d_r_mutex_lock(&e->lock);
if (ksynch_get_value(&e->signaled) == 0) {
LOG(THREAD, LOG_THREADS, 3,
"thread " TIDFMT " was beaten to event " PFX "\n",
d_r_get_thread_id(), e);
d_r_mutex_unlock(&e->lock);
} else {
if (!e->broadcast) {
ksynch_set_value(&e->signaled, 0);
}
d_r_mutex_unlock(&e->lock);
LOG(THREAD, LOG_THREADS, 3,
"thread " TIDFMT " finished waiting for event " PFX "\n",
d_r_get_thread_id(), e);
return true;
}
} else {
* doesn't matter because the flag will be re-checked.
*/
ksynch_wait(&e->signaled, 0, timeout_ms);
}
if (ksynch_get_value(&e->signaled) == 0) {
os_thread_yield();
}
if (timeout_ms > 0)
cur_time = query_time_millis();
} while (timeout_ms <= 0 || cur_time - start_time < timeout_ms);
return false;
}
* DIRECTORY ITERATOR
*/
* iterate them by adding d_reclen to the current buffer offset and interpreting
* that as the next entry.
*/
struct linux_dirent {
#ifdef SYS_getdents
unsigned long d_ino;
unsigned long d_off;
unsigned short d_reclen;
char d_name[];
#else
uint64 d_ino;
int64 d_off;
unsigned short d_reclen;
unsigned char d_type;
char d_name[];
#endif
};
#define CURRENT_DIRENT(iter) ((struct linux_dirent *)(&iter->buf[iter->off]))
static void
os_dir_iterator_start(dir_iterator_t *iter, file_t fd)
{
iter->fd = fd;
iter->off = 0;
iter->end = 0;
}
static bool
os_dir_iterator_next(dir_iterator_t *iter)
{
#ifdef MACOS
* On Linux it's only used to enumerate /proc/pid/task.
*/
ASSERT_NOT_IMPLEMENTED(false);
return false;
#else
if (iter->off < iter->end) {
iter->off += CURRENT_DIRENT(iter)->d_reclen;
ASSERT(iter->off <= iter->end);
}
if (iter->off == iter->end) {
* not read a partial linux_dirent struct, so we don't need to shift the
* left over bytes to the buffer start. See the getdents manpage for
* the example code that this is based on.
*/
iter->off = 0;
# ifdef SYS_getdents
iter->end =
dynamorio_syscall(SYS_getdents, 3, iter->fd, iter->buf, sizeof(iter->buf));
# else
iter->end =
dynamorio_syscall(SYS_getdents64, 3, iter->fd, iter->buf, sizeof(iter->buf));
# endif
ASSERT(iter->end <= sizeof(iter->buf));
if (iter->end <= 0) {
iter->name = NULL;
if (iter->end < 0) {
LOG(GLOBAL, LOG_SYSCALLS, 1, "getdents syscall failed with errno %d\n",
-iter->end);
}
return false;
}
}
iter->name = CURRENT_DIRENT(iter)->d_name;
return true;
#endif
}
* THREAD TAKEOVER
*/
typedef struct _takeover_record_t {
thread_id_t tid;
event_t event;
} takeover_record_t;
* pairs here. Each thread we signal is supposed to enter DR control and signal
* this event after it has added itself to all_threads.
*
* XXX: What we really want is to be able to use SYS_rt_tgsigqueueinfo (Linux >=
* 2.6.31) to pass the event_t to each thread directly, rather than using this
* side data structure.
*/
static takeover_record_t *thread_takeover_records;
static uint num_thread_takeover_records;
* owning_thread and signal_field threads from it in the signaled threads to
* set up siginfo sharing.
*/
static dcontext_t *takeover_dcontext;
* XXX: The /proc man page says /proc/pid/task is only available if the main
* thread is still alive, but experiments on 2.6.38 show otherwise.
*/
static thread_id_t *
os_list_threads(dcontext_t *dcontext, uint *num_threads_out)
{
dir_iterator_t iter;
file_t task_dir;
uint tids_alloced = 10;
uint num_threads = 0;
thread_id_t *new_tids;
thread_id_t *tids;
ASSERT(num_threads_out != NULL);
#ifdef MACOS
* We may want SYS_proc_info with PROC_INFO_PID_INFO and PROC_PIDLISTTHREADS,
* or is that just BSD threads and instead we want process_set_tasks()
* and task_info() as in 7.3.1.3 in Singh's OSX book?
*/
*num_threads_out = 0;
return NULL;
#endif
tids =
HEAP_ARRAY_ALLOC(dcontext, thread_id_t, tids_alloced, ACCT_THREAD_MGT, PROTECTED);
task_dir = os_open_directory("/proc/self/task", OS_OPEN_READ);
ASSERT(task_dir != INVALID_FILE);
os_dir_iterator_start(&iter, task_dir);
while (os_dir_iterator_next(&iter)) {
thread_id_t tid;
DEBUG_DECLARE(int r;)
if (strcmp(iter.name, ".") == 0 || strcmp(iter.name, "..") == 0)
continue;
IF_DEBUG(r =)
sscanf(iter.name, "%u", &tid);
ASSERT_MESSAGE(CHKLVL_ASSERTS, "failed to parse /proc/pid/task entry", r == 1);
if (tid <= 0)
continue;
if (num_threads == tids_alloced) {
new_tids = HEAP_ARRAY_ALLOC(dcontext, thread_id_t, tids_alloced * 2,
ACCT_THREAD_MGT, PROTECTED);
memcpy(new_tids, tids, sizeof(thread_id_t) * tids_alloced);
HEAP_ARRAY_FREE(dcontext, tids, thread_id_t, tids_alloced, ACCT_THREAD_MGT,
PROTECTED);
tids = new_tids;
tids_alloced *= 2;
}
tids[num_threads++] = tid;
}
ASSERT(iter.end == 0);
os_close(task_dir);
new_tids =
HEAP_ARRAY_ALLOC(dcontext, thread_id_t, num_threads, ACCT_THREAD_MGT, PROTECTED);
memcpy(new_tids, tids, sizeof(thread_id_t) * num_threads);
HEAP_ARRAY_FREE(dcontext, tids, thread_id_t, tids_alloced, ACCT_THREAD_MGT,
PROTECTED);
tids = new_tids;
*num_threads_out = num_threads;
return tids;
}
* all_threads hashtable in dynamo.c. Returns true if we found any unknown
* threads and false otherwise. We assume that since we don't know about them
* they are not under DR and have no dcontexts.
*/
bool
os_take_over_all_unknown_threads(dcontext_t *dcontext)
{
uint i;
uint num_threads;
thread_id_t *tids;
uint threads_to_signal = 0, threads_timed_out = 0;
* the last call to this routine and getting onto the all_threads list as
* we'll self-interpret our own code leading to a lot of problems.
* XXX: should we use an event to avoid this inefficient loop? We expect
* this to only happen in rare cases during attach when threads are in flux.
*/
while (uninit_thread_count > 0)
os_thread_yield();
* full synchronization at detach. The same thread may now already be on its way
* to exit, and its thread record might be gone already and make it look like a
* new native thread below. If we rely on the thread to self-detect that it was
* interrupted at a DR address we may run into a deadlock (i#2694). In order to
* avoid this, we wait here. This is expected to be uncommon, and can only happen
* with very short-lived threads.
* XXX: if this loop turns out to be too inefficient, we could support detecting
* the lock function's address bounds along w/ is_dynamo_address.
*/
while (exiting_thread_count > 0)
os_thread_yield();
d_r_mutex_lock(&thread_initexit_lock);
CLIENT_ASSERT(thread_takeover_records == NULL,
"Only one thread should attempt app take over!");
#ifdef LINUX
if (rseq_is_registered_for_current_thread())
rseq_locate_rseq_regions(false);
#endif
* our control. Shift them to the beginning of the tids array.
*/
tids = os_list_threads(dcontext, &num_threads);
if (tids == NULL) {
d_r_mutex_unlock(&thread_initexit_lock);
return false;
}
for (i = 0; i < num_threads; i++) {
thread_record_t *tr = thread_lookup(tids[i]);
if (tr == NULL ||
* XXX i#95: we need a synchall-style loop for known threads as
* they can be in DR for syscall hook handling.
* Update: we now remove the hook for start/stop: but native_exec
* or other individual threads going native could still hit this.
*/
(is_thread_currently_native(tr) && !IS_CLIENT_THREAD(tr->dcontext)))
tids[threads_to_signal++] = tids[i];
}
LOG(GLOBAL, LOG_THREADS, 1, "TAKEOVER: %d threads to take over\n", threads_to_signal);
if (threads_to_signal > 0) {
takeover_record_t *records;
handle_clone(dcontext, PTHREAD_CLONE_FLAGS);
LOG(GLOBAL, LOG_THREADS, 1, "TAKEOVER: publishing takeover records\n");
records = HEAP_ARRAY_ALLOC(dcontext, takeover_record_t, threads_to_signal,
ACCT_THREAD_MGT, PROTECTED);
for (i = 0; i < threads_to_signal; i++) {
LOG(GLOBAL, LOG_THREADS, 1, "TAKEOVER: will signal thread " TIDFMT "\n",
tids[i]);
records[i].tid = tids[i];
records[i].event = create_event();
}
thread_takeover_records = records;
num_thread_takeover_records = threads_to_signal;
takeover_dcontext = dcontext;
for (i = 0; i < threads_to_signal; i++) {
send_suspend_signal(NULL, get_process_id(), records[i].tid);
}
d_r_mutex_unlock(&thread_initexit_lock);
ASSERT_OWN_NO_LOCKS();
for (i = 0; i < threads_to_signal; i++) {
static const int progress_period = 50;
if (i % progress_period == 0) {
char buf[16];
snprintf(buf, BUFFER_SIZE_ELEMENTS(buf), "%d/%d", i + 1,
threads_to_signal + 1);
NULL_TERMINATE_BUFFER(buf);
SYSLOG(SYSLOG_VERBOSE, INFO_ATTACHED, 3, buf, get_application_name(),
get_application_pid());
}
static const int wait_ms = 25;
int max_attempts =
DYNAMO_OPTION(takeover_timeout_ms) / wait_ms;
int attempts = 0;
while (!wait_for_event(records[i].event, wait_ms)) {
char task[64];
snprintf(task, BUFFER_SIZE_ELEMENTS(task), "/proc/self/task/%d", tids[i]);
NULL_TERMINATE_BUFFER(task);
if (!os_file_exists(task, false )) {
SYSLOG_INTERNAL_WARNING_ONCE("thread exited while attaching");
break;
}
if (++attempts > max_attempts) {
if (DYNAMO_OPTION(unsafe_ignore_takeover_timeout)) {
SYSLOG(
SYSLOG_VERBOSE, THREAD_TAKEOVER_TIMED_OUT, 3,
get_application_name(), get_application_pid(),
"Continuing since -unsafe_ignore_takeover_timeout is set.");
++threads_timed_out;
} else {
SYSLOG(
SYSLOG_VERBOSE, THREAD_TAKEOVER_TIMED_OUT, 3,
get_application_name(), get_application_pid(),
"Aborting. Use -unsafe_ignore_takeover_timeout to ignore.");
REPORT_FATAL_ERROR_AND_EXIT(FAILED_TO_TAKE_OVER_THREADS, 2,
get_application_name(),
get_application_pid());
}
break;
}
}
}
* records and reset the shared globals.
*/
d_r_mutex_lock(&thread_initexit_lock);
LOG(GLOBAL, LOG_THREADS, 1,
"TAKEOVER: takeover complete, unpublishing records\n");
thread_takeover_records = NULL;
num_thread_takeover_records = 0;
takeover_dcontext = NULL;
for (i = 0; i < threads_to_signal; i++) {
destroy_event(records[i].event);
}
HEAP_ARRAY_FREE(dcontext, records, takeover_record_t, threads_to_signal,
ACCT_THREAD_MGT, PROTECTED);
}
d_r_mutex_unlock(&thread_initexit_lock);
HEAP_ARRAY_FREE(dcontext, tids, thread_id_t, num_threads, ACCT_THREAD_MGT, PROTECTED);
ASSERT(threads_to_signal >= threads_timed_out);
return (threads_to_signal - threads_timed_out) > 0;
}
bool
os_thread_re_take_over(void)
{
#ifdef X86
* We bypass the magic field checks here of is_thread_tls_initialized().
* XXX: should this be inside is_thread_initialized()? But that may mislead
* other callers: the caller has to restore the TLs. Some old code also
* used get_thread_private_dcontext() being NULL to indicate an unknown thread:
* that should also call here.
*/
if (!is_thread_initialized() && is_thread_tls_allocated()) {
thread_record_t *tr = thread_lookup(get_sys_thread_id());
if (tr != NULL) {
ASSERT(is_thread_currently_native(tr));
LOG(GLOBAL, LOG_THREADS, 1, "\tretakeover for cur-native thread " TIDFMT "\n",
get_sys_thread_id());
LOG(tr->dcontext->logfile, LOG_THREADS, 1,
"\nretakeover for cur-native thread " TIDFMT "\n", get_sys_thread_id());
os_swap_dr_tls(tr->dcontext, false );
ASSERT(is_thread_initialized());
return true;
}
}
#endif
return false;
}
static void
os_thread_signal_taken_over(void)
{
thread_id_t mytid;
event_t event = NULL;
uint i;
mytid = d_r_get_thread_id();
ASSERT(thread_takeover_records != NULL);
for (i = 0; i < num_thread_takeover_records; i++) {
if (thread_takeover_records[i].tid == mytid) {
event = thread_takeover_records[i].event;
break;
}
}
ASSERT_MESSAGE(CHKLVL_ASSERTS, "mytid not present in takeover records!",
event != NULL);
signal_event(event);
}
* that signaled us by signalling our event in thread_takeover_records.
* If it returns, it returns false, and the thread should be let go.
*/
bool
os_thread_take_over(priv_mcontext_t *mc, kernel_sigset_t *sigset)
{
dcontext_t *dcontext;
priv_mcontext_t *dc_mc;
LOG(GLOBAL, LOG_THREADS, 1, "TAKEOVER: received signal in thread " TIDFMT "\n",
get_sys_thread_id());
* create_clone_record and new_thread_setup, except we're not putting a
* clone record on the dstack.
*/
os_thread_re_take_over();
if (!is_thread_initialized()) {
if (is_dynamo_address(mc->pc)) {
os_thread_signal_taken_over();
return false;
}
dcontext = init_thread_with_shared_siginfo(mc, takeover_dcontext);
ASSERT(dcontext != NULL);
} else {
dcontext = get_thread_private_dcontext();
ASSERT(dcontext != NULL);
}
signal_set_mask(dcontext, sigset);
signal_swap_mask(dcontext, true );
dynamo_thread_under_dynamo(dcontext);
dc_mc = get_mcontext(dcontext);
*dc_mc = *mc;
dcontext->whereami = DR_WHERE_APP;
dcontext->next_tag = mc->pc;
os_thread_signal_taken_over();
DOLOG(2, LOG_TOP, {
byte *cur_esp;
GET_STACK_PTR(cur_esp);
LOG(THREAD, LOG_TOP, 2,
"%s: next_tag=" PFX ", cur xsp=" PFX ", mc->xsp=" PFX "\n", __FUNCTION__,
dcontext->next_tag, cur_esp, mc->xsp);
});
#ifdef LINUX
* regions as rseq when the rseq syscall is never set up.
*/
if (rseq_is_registered_for_current_thread()) {
rseq_locate_rseq_regions(false);
rseq_thread_attach(dcontext);
}
#endif
call_switch_stack(dcontext, dcontext->dstack, (void (*)(void *))d_r_dispatch,
NULL , false );
ASSERT_NOT_REACHED();
return true;
}
bool
os_thread_take_over_suspended_native(dcontext_t *dcontext)
{
os_thread_data_t *ostd = (os_thread_data_t *)dcontext->os_field;
if (!is_thread_currently_native(dcontext->thread_record) ||
ksynch_get_value(&ostd->suspended) < 0)
return false;
* for when it resumes:
*/
* tested. i#721 may help.
*/
ASSERT_NOT_TESTED();
ostd->retakeover = true;
return true;
}
* that called dr_app_setup().
*/
dcontext_t *
os_thread_take_over_secondary(priv_mcontext_t *mc)
{
thread_record_t **list;
int num_threads;
int i;
dcontext_t *dcontext;
d_r_mutex_lock(&thread_initexit_lock);
get_list_of_threads(&list, &num_threads);
ASSERT(num_threads >= 1);
for (i = 0; i < num_threads; i++) {
if (is_thread_signal_info_initialized(list[i]->dcontext))
break;
}
ASSERT(i < num_threads);
ASSERT(list[i]->id != get_sys_thread_id());
handle_clone(list[i]->dcontext, PTHREAD_CLONE_FLAGS);
dcontext = init_thread_with_shared_siginfo(mc, list[i]->dcontext);
d_r_mutex_unlock(&thread_initexit_lock);
global_heap_free(list,
num_threads * sizeof(thread_record_t *) HEAPACCT(ACCT_THREAD_MGT));
return dcontext;
}
uint
os_random_seed(void)
{
uint seed;
int urand = os_open("/dev/urandom", OS_OPEN_READ);
DEBUG_DECLARE(int read =) os_read(urand, &seed, sizeof(seed));
ASSERT(read == sizeof(seed));
os_close(urand);
return seed;
}
#ifdef RCT_IND_BRANCH
* return false if not a code section in a module
* otherwise returns true and adds all valid targets for rct_ind_branch_check
*/
bool
rct_analyze_module_at_violation(dcontext_t *dcontext, app_pc target_pc)
{
* we don't yet have a corresponding get_allocation_size or an ELF header walk routine
* on linux
*/
app_pc code_start;
size_t code_size;
uint prot;
if (!get_memory_info(target_pc, &code_start, &code_size, &prot))
return false;
* the corresponding data section.
* Writable yet initialized data indeed needs to be processed.
*/
if (code_size > 0) {
app_pc code_end = code_start + code_size;
app_pc data_start;
size_t data_size;
ASSERT(TESTALL(MEMPROT_READ | MEMPROT_EXEC, prot));
if (!get_memory_info(code_end, &data_start, &data_size, &prot))
return false;
ASSERT(data_start == code_end);
ASSERT(TESTALL(MEMPROT_READ | MEMPROT_WRITE, prot));
app_pc text_start = code_start;
app_pc text_end = data_start + data_size;
DEBUG_DECLARE(uint found =)
find_address_references(dcontext, text_start, text_end, code_start, code_end);
LOG(GLOBAL, LOG_RCT, 2, PFX "-" PFX " : %d ind targets of %d code size",
text_start, text_end, found, code_size);
return true;
}
return false;
}
# ifdef X64
bool
rct_add_rip_rel_addr(dcontext_t *dcontext, app_pc tgt _IF_DEBUG(app_pc src))
{
return false;
}
# endif
#endif
#ifdef HOT_PATCHING_INTERFACE
void *
get_drmarker_hotp_policy_status_table()
{
ASSERT_NOT_IMPLEMENTED(false);
return NULL;
}
void
set_drmarker_hotp_policy_status_table(void *new_table)
{
ASSERT_NOT_IMPLEMENTED(false);
}
byte *
hook_text(byte *hook_code_buf, const app_pc image_addr, intercept_function_t hook_func,
const void *callee_arg, const after_intercept_action_t action_after,
const bool abort_if_hooked, const bool ignore_cti, byte **app_code_copy_p,
byte **alt_exit_tgt_p)
{
ASSERT_NOT_IMPLEMENTED(false);
return NULL;
}
void
unhook_text(byte *hook_code_buf, app_pc image_addr)
{
ASSERT_NOT_IMPLEMENTED(false);
}
void
insert_jmp_at_tramp_entry(dcontext_t *dcontext, byte *trampoline, byte *target)
{
ASSERT_NOT_IMPLEMENTED(false);
}
#endif
bool
aslr_is_possible_attack(app_pc target)
{
return false;
}
app_pc
aslr_possible_preferred_address(app_pc target_addr)
{
return NULL;
}
void
take_over_primary_thread()
{
}
bool
os_current_user_directory(char *directory_prefix , uint directory_len,
bool create)
{
uid_t uid = dynamorio_syscall(SYS_getuid, 0);
char *directory = directory_prefix;
char *dirend = directory_prefix + strlen(directory_prefix);
snprintf(dirend, directory_len - (dirend - directory_prefix), "%cdpc-%d", DIRSEP,
uid);
directory_prefix[directory_len - 1] = '\0';
if (!os_file_exists(directory, true ) && create) {
if (!os_create_dir(directory, CREATE_DIR_REQUIRE_NEW)) {
LOG(GLOBAL, LOG_CACHE, 2, "\terror creating per-user dir %s\n", directory);
return false;
} else {
LOG(GLOBAL, LOG_CACHE, 2, "\tcreated per-user dir %s\n", directory);
}
}
return true;
}
bool
os_validate_user_owned(file_t file_or_directory_handle)
{
ASSERT(false && "chown Alice evilfile");
return false;
}
bool
os_check_option_compatibility(void)
{
return false;
}
#ifdef X86_32
* XXX: Does *not* handle 64-bit divisors!
*/
static uint64
uint64_divmod(uint64 dividend, uint64 divisor64, uint32 *remainder)
{
union {
uint64 v64;
struct {
uint32 lo;
uint32 hi;
};
} res;
uint32 upper;
uint32 divisor = (uint32)divisor64;
ASSERT(divisor64 <= UINT_MAX && "divisor is larger than uint32 can hold");
res.v64 = dividend;
upper = res.hi;
res.hi = upper / divisor;
upper %= divisor;
* dividend. We only get a 32-bit quotient out, which is why we divide out
* the high bits first. The quotient will fit in EAX.
*
* DIV r/m32 F7 /6 Unsigned divide EDX:EAX by r/m32, with result stored
* in EAX <- Quotient, EDX <- Remainder.
* inputs:
* EAX = res.lo
* EDX = upper
* rm = divisor
* outputs:
* res.lo = EAX
* *remainder = EDX
* The outputs precede the inputs in gcc inline asm syntax, and so to put
* inputs in EAX and EDX we use "0" and "1".
*/
asm("divl %2"
: "=a"(res.lo), "=d"(*remainder)
: "rm"(divisor), "0"(res.lo), "1"(upper));
return res.v64;
}
uint64
__udivdi3(uint64 dividend, uint64 divisor)
{
uint32 remainder;
return uint64_divmod(dividend, divisor, &remainder);
}
uint64
__umoddi3(uint64 dividend, uint64 divisor)
{
uint32 remainder;
uint64_divmod(dividend, divisor, &remainder);
return (uint64)remainder;
}
static int64
int64_divmod(int64 dividend, int64 divisor64, int *remainder)
{
union {
int64 v64;
struct {
int lo;
int hi;
};
} res;
int upper;
int divisor = (int)divisor64;
ASSERT(divisor64 <= INT_MAX && divisor64 >= INT_MIN && "divisor too large for int");
res.v64 = dividend;
upper = res.hi;
res.hi = upper / divisor;
upper %= divisor;
* on edx:eax by r/m32 => quotient in eax, remainder in edx.
*/
asm("idivl %2"
: "=a"(res.lo), "=d"(*remainder)
: "rm"(divisor), "0"(res.lo), "1"(upper));
return res.v64;
}
int64
__divdi3(int64 dividend, int64 divisor)
{
int remainder;
return int64_divmod(dividend, divisor, &remainder);
}
#elif defined(ARM)
* We link with __aeabi routines from libgcc via third_party/libgcc.
*/
#endif
* Tests
*/
#if defined(STANDALONE_UNIT_TEST)
void
test_uint64_divmod(void)
{
# ifdef X86_32
uint64 quotient;
uint32 remainder;
quotient = uint64_divmod(9, 3, &remainder);
EXPECT(quotient == 3, true);
EXPECT(remainder == 0, true);
quotient = uint64_divmod(10, 3, &remainder);
EXPECT(quotient == 3, true);
EXPECT(remainder == 1, true);
quotient = uint64_divmod(45ULL << 31, 1U << 31, &remainder);
EXPECT(quotient == 45, true);
EXPECT(remainder == 0, true);
quotient = uint64_divmod(45ULL << 32, 15, &remainder);
EXPECT(quotient == 3ULL << 32, true);
EXPECT(remainder == 0, true);
quotient = uint64_divmod((45ULL << 32) + 13, 15, &remainder);
EXPECT(quotient == 3ULL << 32, true);
EXPECT(remainder == 13, true);
* lower that to a shift.
*/
quotient = (45ULL << 32);
quotient /= 15;
EXPECT(quotient == (3ULL << 32), true);
quotient = (45ULL << 32) + 13;
remainder = quotient % 15;
EXPECT(remainder == 13, true);
# endif
}
void
unit_test_os(void)
{
test_uint64_divmod();
}
#endif