* Copyright (c) 2013-2023 Google, Inc. All rights reserved.
* *******************************************************************************/
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
* tls_macos.c - tls support from the kernel
*
* FIXME i#58: NYI (see comments below as well):
* + not at all implemented, though 32-bit seems straightforward
* + don't have a good story for 64-bit
* + longer-term i#1291: use raw syscalls instead of libSystem wrappers
*/
#include "../globals.h"
#include "tls.h"
#include <architecture/i386/table.h>
#include <i386/user_ldt.h>
#include <pthread.h>
#ifndef MACOS
# error Mac-only
#endif
#define SYS_thread_set_tsd_base 3
#define SYS_thread_set_user_ldt 4
#define SYS_i386_set_ldt 5
#define SYS_i386_get_ldt 6
* XXX: a 32-bit Mac kernel will return 0x3f?
* If so, update GDT_NUM_TLS_SLOTS in tls.h.
*/
#define TLS_DR_SELECTOR 0x1f
#define TLS_DR_INDEX 0x3
static uint tls_app_index;
#ifdef X64
static pthread_key_t keys_start = 0;
static pthread_key_t
tls_alloc_key(void)
{
pthread_key_t key;
if (pthread_key_create(&key, NULL) != 0) {
REPORT_FATAL_ERROR_AND_EXIT(FAILED_TO_ALLOCATE_TLS, 3, get_application_name(),
get_application_pid(),
"System is out of slots or out of memory.");
ASSERT_NOT_REACHED();
}
return key;
}
void
tls_process_init(void)
{
* slots using pthread_key_create(). Our initial implementation allocates
* enough to fit our entire os_local_state_t struct, to make Mac64 behave
* like Linux. If this proves to be too many slots taken from the app,
* we'll want to shift to a strategy like Windows where we only put
* local_state_extended_t in slots and have a separate DR allocation for our
* other data, pointed at by a TLS slot (one of these, or slot 6).
*/
int num_slots_needed = sizeof(os_local_state_t) / sizeof(void *);
byte *seg_base = get_segment_base(TLS_REG_LIB);
uint alignment;
if (DYNAMO_OPTION(tls_align) == 0) {
IF_X64(ASSERT_TRUNCATE(alignment, uint, proc_get_cache_line_size()));
alignment = (uint)proc_get_cache_line_size();
} else {
alignment = DYNAMO_OPTION(tls_align);
}
int i;
pthread_key_t delete_start = 0, delete_end = 0;
for (i = 0; i < alignment / sizeof(void *); i++) {
pthread_key_t key = tls_alloc_key();
if (ALIGNED(seg_base + key * sizeof(void *), alignment)) {
keys_start = key;
break;
}
if (i == 0)
delete_start = key;
delete_end = key;
}
if (keys_start == 0) {
REPORT_FATAL_ERROR_AND_EXIT(FAILED_TO_ALLOCATE_TLS, 3, get_application_name(),
get_application_pid(),
"Failed to find aligned slot.");
ASSERT_NOT_REACHED();
}
for (i = 1; i < num_slots_needed; i++) {
pthread_key_t key = tls_alloc_key();
if (key != keys_start + i) {
* contiguous range elsewhere in the TLS space, like we do on Windows,
* instead of assuming the first free set is big enough.
*/
REPORT_FATAL_ERROR_AND_EXIT(FAILED_TO_ALLOCATE_TLS, 3, get_application_name(),
get_application_pid(),
"Slots are not contiguous.");
ASSERT_NOT_REACHED();
}
pthread_setspecific(key, NULL);
}
if (delete_start > 0) {
for (pthread_key_t key = delete_start; key <= delete_end; key++) {
DEBUG_DECLARE(int res =)
pthread_key_delete(key);
ASSERT(res == 0);
}
}
LOG(GLOBAL, LOG_THREADS, 1, "Reserved TLS keys %d-%d from base " PFX "\n", keys_start,
keys_start + num_slots_needed - 1, get_segment_base(TLS_REG_LIB));
DODEBUG({
int seg_offs = keys_start * sizeof(void *);
ASSERT((ptr_int_t)pthread_getspecific(keys_start) == 0);
ASSERT(*(ptr_int_t *)(seg_base + seg_offs) == 0);
# define MAGIC_VALUE 0xdeadbeef12345678UL
int res = pthread_setspecific(keys_start, (void *)MAGIC_VALUE);
ASSERT(res == 0);
ASSERT((ptr_int_t)pthread_getspecific(keys_start) == MAGIC_VALUE);
ASSERT(*(ptr_int_t *)(seg_base + seg_offs) == MAGIC_VALUE);
});
}
void
tls_process_exit(void)
{
int num_slots_needed = sizeof(os_local_state_t) / sizeof(void *);
for (int i = 0; i < num_slots_needed; i++) {
DEBUG_DECLARE(int res =)
pthread_key_delete(keys_start + i);
ASSERT(res == 0);
}
}
int
tls_get_dr_offs(void)
{
return keys_start * sizeof(void *);
}
byte *
tls_get_dr_addr(void)
{
if (keys_start == 0) {
return NULL;
}
byte *seg_base = get_segment_base(TLS_REG_LIB);
return seg_base + keys_start * sizeof(void *);
}
byte **
get_app_tls_swap_slot_addr(void)
{
byte *app_tls_base = (byte *)read_thread_register(TLS_REG_LIB);
if (app_tls_base == NULL) {
ASSERT_NOT_IMPLEMENTED(false);
}
return (byte **)(app_tls_base + DR_TLS_BASE_OFFSET);
}
#endif
#ifdef AARCH64
byte **
get_dr_tls_base_addr(void)
{
return get_app_tls_swap_slot_addr();
}
#endif
void
tls_thread_init(os_local_state_t *os_tls, byte *segment)
{
#ifdef X64
* it in slot 6. If we settle on always doing the full os_local_state_t in slots,
* we would probably get rid of the use of slot 6 on x86 (on aarch64 the
* os_local_state_t slots are not directly addressible; we rely on the stolen
* register, whose value is populated from the pointer in slot 6 -- which could
* be moved to a slot right before os_local_state_t or something I suppose, or
* we could move the whole os_local_state_t to our own mmap since we access
* through a pointer anyway).
*/
byte **tls_swap_slot;
ASSERT((byte *)(os_tls->self) == segment);
tls_swap_slot = get_app_tls_swap_slot_addr();
ASSERT_NOT_IMPLEMENTED(*tls_swap_slot == NULL);
*tls_swap_slot = segment;
os_tls->tls_type = TLS_TYPE_SLOT;
#else
* support which swaps it on thread switches.
* However, when I invoke it, while the call succeeds and returns the
* expected 0x1f, I get a fault when I load that selector value into %fs.
* Thus we fall back to i386_set_ldt.
*/
ldt_t ldt;
int res;
ldt.data.base00 = (ushort)(ptr_uint_t)segment;
ldt.data.base16 = (byte)((ptr_uint_t)segment >> 16);
ldt.data.base24 = (byte)((ptr_uint_t)segment >> 24);
ldt.data.limit00 = (ushort)PAGE_SIZE;
ldt.data.limit16 = 0;
ldt.data.type = DESC_DATA_WRITE;
ldt.data.dpl = USER_PRIVILEGE;
ldt.data.present = 1;
ldt.data.stksz = DESC_DATA_32B;
ldt.data.granular = 0;
res = dynamorio_mach_dep_syscall(SYS_i386_set_ldt, 3, LDT_AUTO_ALLOC, &ldt, 1);
if (res < 0) {
LOG(THREAD_GET, LOG_THREADS, 4, "%s failed with code %d\n", __FUNCTION__, res);
ASSERT_NOT_REACHED();
} else {
uint index = (uint)res;
uint selector = LDT_SELECTOR(index);
* but later ones seem to need new slots (originally I thought
* the kernel would swap our one slot for us). We leave
* GDT_NUM_TLS_SLOTS as just 3 under the assumption the app
* won't use more than that.
*/
ASSERT(dynamo_initialized || selector == TLS_DR_SELECTOR);
LOG(THREAD_GET, LOG_THREADS, 2, "%s: LDT index %d\n", __FUNCTION__, res);
os_tls->tls_type = TLS_TYPE_LDT;
os_tls->ldt_index = selector;
WRITE_DR_SEG(selector);
}
#endif
}
bool
tls_thread_preinit()
{
return true;
}
#ifndef X64
void
tls_reinstate_selector(uint selector)
{
WRITE_DR_SEG(selector);
}
#endif
void
tls_thread_free(tls_type_t tls_type, int index)
{
#ifdef X64
byte **tls_swap_slot;
ASSERT(tls_type == TLS_TYPE_SLOT);
tls_swap_slot = get_app_tls_swap_slot_addr();
ASSERT(tls_swap_slot != NULL);
DEBUG_DECLARE(os_local_state_t *os_tls = (os_local_state_t *)*tls_swap_slot;)
ASSERT(os_tls->self == os_tls);
*tls_swap_slot = TLS_SLOT_VAL_EXITED;
#else
int res = dynamorio_mach_dep_syscall(SYS_thread_set_user_ldt, 3, NULL, 0, 0);
if (res < 0) {
LOG(THREAD_GET, LOG_THREADS, 4, "%s failed with code %d\n", __FUNCTION__, res);
ASSERT_NOT_REACHED();
}
#endif
}
#ifndef AARCHXX
* Returns POINTER_MAX on failure.
*/
byte *
tls_get_fs_gs_segment_base(uint seg)
{
uint selector;
uint index;
ldt_t ldt;
byte *base;
int res;
IF_X64(ASSERT_NOT_REACHED());
# ifdef X86
if (seg != SEG_FS && seg != SEG_GS)
return (byte *)POINTER_MAX;
selector = read_thread_register(seg);
index = SELECTOR_INDEX(selector);
LOG(THREAD_GET, LOG_THREADS, 4, "%s selector %x index %d ldt %d\n", __FUNCTION__,
selector, index, TEST(SELECTOR_IS_LDT, selector));
if (!TEST(SELECTOR_IS_LDT, selector) && selector != 0) {
ASSERT_NOT_IMPLEMENTED(false);
return (byte *)POINTER_MAX;
}
* not a selector value.
*/
res = dynamorio_mach_dep_syscall(SYS_i386_get_ldt, 3, index, &ldt, 1);
if (res < 0) {
LOG(THREAD_GET, LOG_THREADS, 4, "%s failed with code %d\n", __FUNCTION__, res);
ASSERT_NOT_REACHED();
return (byte *)POINTER_MAX;
}
base = (byte *)(((ptr_uint_t)ldt.data.base24 << 24) |
((ptr_uint_t)ldt.data.base16 << 16) | (ptr_uint_t)ldt.data.base00);
LOG(THREAD_GET, LOG_THREADS, 4, "%s => base " PFX "\n", __FUNCTION__, base);
return base;
# endif
}
* Sets only the base: does not change the segment selector register.
*/
bool
tls_set_fs_gs_segment_base(tls_type_t tls_type, uint seg,
* else, desc is used.
*/
byte *base, our_modify_ldt_t *desc)
{
ASSERT_NOT_IMPLEMENTED(false);
return false;
}
#endif
void
tls_init_descriptor(our_modify_ldt_t *desc OUT, void *base, size_t size, uint index)
{
ASSERT_NOT_IMPLEMENTED(false);
}
bool
tls_get_descriptor(int index, our_modify_ldt_t *desc OUT)
{
* on MacOS. For now we implement the handful of such interactions we
* need to get the initial port running.
*/
ldt_t ldt;
int res = dynamorio_mach_dep_syscall(SYS_i386_get_ldt, 3, index, &ldt, 1);
if (res < 0) {
memset(desc, 0, sizeof(*desc));
return false;
}
desc->entry_number = index;
desc->base_addr = (((uint)ldt.data.base24 << 24) | ((uint)ldt.data.base16 << 16) |
(uint)ldt.data.base00);
desc->limit = ((uint)ldt.data.limit16 << 16) | (uint)ldt.data.limit00;
desc->seg_32bit = ldt.data.stksz;
desc->contents = ldt.data.type >> 2;
desc->read_exec_only = !(ldt.data.type & 2);
desc->limit_in_pages = ldt.data.granular;
desc->seg_not_present = (ldt.data.present ? 0 : 1);
desc->useable = 1;
return true;
}
bool
tls_clear_descriptor(int index)
{
ASSERT_NOT_IMPLEMENTED(false);
return false;
}
int
tls_dr_index(void)
{
#ifdef X64
ASSERT_NOT_IMPLEMENTED(false);
#else
return TLS_DR_INDEX;
#endif
return 0;
}
int
tls_priv_lib_index(void)
{
ASSERT_NOT_IMPLEMENTED(false);
return 0;
}
bool
tls_dr_using_msr(void)
{
#ifdef X64
ASSERT_NOT_IMPLEMENTED(false);
#endif
return false;
}
void
tls_initialize_indices(os_local_state_t *os_tls)
{
#ifdef X64
ASSERT_NOT_IMPLEMENTED(false);
#else
uint selector = read_thread_register(SEG_GS);
tls_app_index = SELECTOR_INDEX(selector);
ASSERT(tls_app_index == 1);
#endif
}
int
tls_min_index(void)
{
return tls_app_index;
}
void
tls_early_init(void)
{
}