* Copyright (c) 2011-2022 Google, Inc. All rights reserved.
* Copyright (c) 2000-2010 VMware, Inc. All rights reserved.
* **********************************************************/
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
*
* Note that ATOMIC_*_READ/WRITE() macros follow acquire-release semantics.
*/
#ifndef _ATOMIC_EXPORTS_H_
#define _ATOMIC_EXPORTS_H_ 1
* to a thread in the cache, this is needed for our current flushing
* implementation. Note that linking and unlinking are only atomic with
* respect to a thread in the cache not with respect to a thread in dynamorio
* (which can see linking flags etc.)
*/
* there is no way to do a locked mov, have to use an xchg or similar which is
* a larger performance penalty (not really an issue), note that xchg implies
* lock, so no need for the lock prefix below
*
* further Intel's documentation is a little weird on the issue of
* cross-modifying code (see IA32 volume 3 7-2 through 7-7), "Locked
* instructions should not be used to insure that data written can be fetched
* as instructions" and "Locked operations are atomic with respect to all other
* memory operations and all externally visible events. Only instruction fetch
* and page table access can pass locked instructions", (pass?) however it does
* note that the current versions of P6 family, pentium 4, xeon, pentium and
* 486 allow data written by locked instructions to be fetched as instructions.
* In the cross-modifying code section, however, it gives a (horrible for us)
* algorithm to ensure cross-modifying code is compliant with current and
* future versions of IA-32 then says that "the use of this option is not
* required for programs intended to run on the 486, but is recommended to
* insure compatibility with pentium 4, xeon, P6 family and pentium
* processors", so my take home is that it works now, but don't have any
* expectations for the future - FIXME - */
* entire offset being written is within a cache line, so we can't use a locked
* instruction to ensure atomicity */
#define PAD_JMPS_ALIGNMENT \
(INTERNAL_OPTION(pad_jmps_set_alignment) != 0 \
? INTERNAL_OPTION(pad_jmps_set_alignment) \
: proc_get_cache_line_size())
#ifdef DEBUG
# define CHECK_JMP_TARGET_ALIGNMENT(target, size, hot_patch) \
do { \
if (hot_patch && CROSSES_ALIGNMENT(target, size, PAD_JMPS_ALIGNMENT)) { \
STATS_INC(unaligned_patches); \
ASSERT(!DYNAMO_OPTION(pad_jmps)); \
} \
} while (0)
#else
# define CHECK_JMP_TARGET_ALIGNMENT(target, size, hot_patch)
#endif
#ifdef WINDOWS
* blocks that touch those registers, so don't need to worry about clobbering
* eax and ebx */
# define ATOMIC_1BYTE_READ(addr_src, addr_res) \
do { \
*(BYTE *)(addr_res) = *(volatile BYTE *)(addr_src); \
} while (0)
# define ATOMIC_1BYTE_WRITE(target, value, hot_patch) \
do { \
ASSERT(sizeof(*target) == 1); \
* consistency. \
*/ \
_InterlockedExchange8((volatile CHAR *)(target), (CHAR)(value)); \
} while (0)
# define ATOMIC_4BYTE_WRITE(target, value, hot_patch) \
do { \
ASSERT(sizeof(value) == 4); \
\
CHECK_JMP_TARGET_ALIGNMENT(target, 4, hot_patch); \
\
_InterlockedExchange((volatile LONG *)(target), (LONG)(value)); \
} while (0)
# define ATOMIC_4BYTE_ALIGNED_WRITE(target, value, hot_patch) \
do { \
ASSERT(sizeof(value) == 4); \
ASSERT(ALIGNED(target, 4)); \
*(volatile LONG *)(target) = (LONG)(value); \
} while (0)
# define ATOMIC_4BYTE_ALIGNED_READ(addr_src, addr_res) \
do { \
ASSERT(ALIGNED(addr_src, 4)); \
*(LONG *)(addr_res) = *(volatile LONG *)(addr_src); \
} while (0)
# ifdef X64
# define ATOMIC_8BYTE_WRITE(target, value, hot_patch) \
do { \
ASSERT(sizeof(value) == 8); \
\
ASSERT_CURIOSITY(!hot_patch); \
\
CHECK_JMP_TARGET_ALIGNMENT(target, 8, hot_patch); \
\
_InterlockedExchange64((volatile __int64 *)target, (__int64)value); \
} while (0)
# define ATOMIC_8BYTE_ALIGNED_WRITE(target, value, hot_patch) \
do { \
ASSERT(sizeof(value) == 8); \
ASSERT(ALIGNED(target, 8)); \
\
ASSERT_CURIOSITY(!hot_patch); \
*(volatile __int64 *)(target) = (__int64)(value); \
} while (0)
# define ATOMIC_8BYTE_ALIGNED_READ(addr_src, addr_res) \
do { \
ASSERT(ALIGNED(addr_src, 8)); \
*(__int64 *)(addr_res) = *(volatile __int64 *)(addr_src); \
} while (0)
# endif
* FIXME: these intrinsics all use xadd even when no return value is needed!
* We assume these aren't performance-critical enough to care.
* If we do change to not have return value, need to change static inlines below.
* Even if these turn into callouts, they should be reentrant.
*/
# define ATOMIC_INC_int(var) _InterlockedIncrement((volatile LONG *)&(var))
# ifdef X64
# define ATOMIC_INC_int64(var) _InterlockedIncrement64((volatile __int64 *)&(var))
# endif
# define ATOMIC_INC(type, var) ATOMIC_INC_##type(var)
# define ATOMIC_DEC_int(var) _InterlockedDecrement((volatile LONG *)&(var))
# ifdef X64
# define ATOMIC_DEC_int64(var) _InterlockedDecrement64((volatile __int64 *)&(var))
# endif
# define ATOMIC_DEC(type, var) ATOMIC_DEC_##type(var)
# define ATOMIC_ADD_int(var, value) \
_InterlockedExchangeAdd((volatile LONG *)&(var), value)
# ifdef X64
# define ATOMIC_ADD_int64(var, value) \
_InterlockedExchangeAdd64((volatile __int64 *)&(var), value)
# endif
# define ATOMIC_COMPARE_EXCHANGE_int(var, compare, exchange) \
_InterlockedCompareExchange((volatile LONG *)&(var), exchange, compare)
# define ATOMIC_COMPARE_EXCHANGE_int64(var, compare, exchange) \
_InterlockedCompareExchange64((volatile __int64 *)&(var), exchange, compare)
# define ATOMIC_COMPARE_EXCHANGE ATOMIC_COMPARE_EXCHANGE_int
# define ATOMIC_ADD(type, var, val) ATOMIC_ADD_##type(var, val)
# ifdef X64
# define ATOMIC_ADD_PTR(type, var, val) ATOMIC_ADD_int64(var, val)
# define ATOMIC_COMPARE_EXCHANGE_PTR ATOMIC_COMPARE_EXCHANGE_int64
# else
# define ATOMIC_ADD_PTR(type, var, val) ATOMIC_ADD_int(var, val)
# define ATOMIC_COMPARE_EXCHANGE_PTR ATOMIC_COMPARE_EXCHANGE_int
# endif
# define MEMORY_STORE_BARRIER()
# define SPINLOCK_PAUSE() _mm_pause()
# define SERIALIZE_INSTRUCTIONS() \
do { \
int cpuid_res_local[4]; \
__cpuid(cpuid_res_local, 0); \
}
static inline bool
atomic_inc_and_test(volatile int *var)
{
return (ATOMIC_INC(int, *(var)) == 0);
}
static inline bool
atomic_dec_and_test(volatile int *var)
{
return (ATOMIC_DEC(int, *(var)) == -1);
}
static inline bool
atomic_dec_becomes_zero(volatile int *var)
{
return (ATOMIC_DEC(int, *(var)) == 0);
}
static inline bool
atomic_compare_exchange_int(volatile int *var, int compare, int exchange)
{
return (ATOMIC_COMPARE_EXCHANGE_int(*(var), compare, exchange) == (compare));
}
static inline bool
atomic_compare_exchange_int64(volatile int64 *var, int64 compare, int64 exchange)
{
return ((ptr_int_t)ATOMIC_COMPARE_EXCHANGE_int64(*(var), compare, exchange) ==
(compare));
}
static inline int
atomic_add_exchange_int(volatile int *var, int value)
{
return ((value) + ATOMIC_ADD(int, *(var), value));
}
# ifdef X64
static inline int64
atomic_add_exchange_int64(volatile int64 *var, int64 value)
{
return ((value) + ATOMIC_ADD(int64, *(var), value));
}
# endif
# define atomic_add_exchange atomic_add_exchange_int
#else
# ifdef DR_HOST_X86
* if target is within cache line.
*/
# define ATOMIC_1BYTE_READ(addr_src, addr_res) \
do { \
__asm__ __volatile__("movb %1, %%al; movb %%al, %0" \
: "=m"(*(byte *)(addr_res)) \
: "m"(*(byte *)(addr_src)) \
: "al"); \
} while (0)
# define ATOMIC_1BYTE_WRITE(target, value, hot_patch) \
do { \
\
char _myval = value; \
ASSERT(sizeof(*target) == 1); \
* consistency. \
*/ \
__asm__ __volatile__("xchgb %0, %1" \
: "+m"(*(char *)(target)), "+r"(_myval) \
:); \
} while (0)
# define ATOMIC_4BYTE_WRITE(target, value, hot_patch) \
do { \
\
int _myval = value; \
ASSERT(sizeof(value) == 4); \
\
CHECK_JMP_TARGET_ALIGNMENT(target, 4, hot_patch); \
\
* behavior */ \
__asm__ __volatile__("xchgl %0, %1" \
: "+m"(*(int *)(target)), "+r"(_myval) \
:); \
} while (0)
# define ATOMIC_4BYTE_ALIGNED_WRITE(target, value, hot_patch) \
do { \
\
int _myval = value; \
ASSERT(sizeof(value) == 4); \
ASSERT(ALIGNED(target, 4)); \
__asm__ __volatile__("movl %1, %0" \
: "=m"(*(int *)(target)) \
: "r"(_myval)); \
} while (0)
# define ATOMIC_4BYTE_ALIGNED_READ(addr_src, addr_res) \
do { \
ASSERT(ALIGNED(addr_src, 4)); \
__asm__ __volatile__("movl %1, %%eax; movl %%eax, %0" \
: "=m"(*(int *)(addr_res)) \
: "m"(*(int *)(addr_src)) \
: "eax"); \
} while (0)
# ifdef X64
# define ATOMIC_8BYTE_WRITE(target, value, hot_patch) \
do { \
\
int64 _myval = value; \
ASSERT(sizeof(value) == 8); \
\
ASSERT_CURIOSITY(!hot_patch); \
\
CHECK_JMP_TARGET_ALIGNMENT(target, 8, hot_patch); \
* behavior */ \
__asm__ __volatile__("xchgq %0, %1" \
: "+m"(*(int64 *)(target)), "+r"(_myval) \
:); \
} while (0)
# define ATOMIC_8BYTE_ALIGNED_WRITE(target, value, hot_patch) \
do { \
\
int64 _myval = value; \
ASSERT(sizeof(value) == 8); \
\
ASSERT_CURIOSITY(!hot_patch); \
ASSERT(ALIGNED(target, 8)); \
__asm__ __volatile__("movq %1, %0" \
: "=m"(*(int64 *)(target)) \
: "r"(_myval)); \
} while (0)
# define ATOMIC_8BYTE_ALIGNED_READ(addr_src, addr_res) \
do { \
ASSERT(ALIGNED(addr_src, 8)); \
__asm__ __volatile__("movq %1, %%rax; movq %%rax, %0" \
: "=m"(*(int64 *)(addr_res)) \
: "m"(*(int64 *)addr_src) \
: "rax"); \
} while (0)
# endif
# define ATOMIC_INC_suffix(suffix, var) \
__asm__ __volatile__("lock inc" suffix " %0" : "=m"(var) : : "cc", "memory")
# define ATOMIC_INC_int(var) ATOMIC_INC_suffix("l", var)
# define ATOMIC_INC_int64(var) ATOMIC_INC_suffix("q", var)
# define ATOMIC_INC(type, var) ATOMIC_INC_##type(var)
# define ATOMIC_DEC_suffix(suffix, var) \
__asm__ __volatile__("lock dec" suffix " %0" : "=m"(var) : : "cc", "memory")
# define ATOMIC_DEC_int(var) ATOMIC_DEC_suffix("l", var)
# define ATOMIC_DEC_int64(var) ATOMIC_DEC_suffix("q", var)
# define ATOMIC_DEC(type, var) ATOMIC_DEC_##type(var)
* and then complain that "lock addq" can't take %eax!
* so we use "ri":
*/
# define ATOMIC_ADD_suffix(suffix, var, value) \
__asm__ __volatile__("lock add" suffix " %1, %0" \
: "=m"(var) \
: "ri"(value) \
: "cc", "memory")
# define ATOMIC_ADD_int(var, val) ATOMIC_ADD_suffix("l", var, val)
# define ATOMIC_ADD_int64(var, val) ATOMIC_ADD_suffix("q", var, val)
# define ATOMIC_ADD(type, var, val) ATOMIC_ADD_##type(var, val)
# define ATOMIC_ADD_EXCHANGE_suffix(suffix, var, value, result) \
__asm__ __volatile__("lock xadd" suffix " %1, %0" \
: "=m"(*var), "=r"(result) \
: "1"(value) \
: "cc", "memory")
# define ATOMIC_ADD_EXCHANGE_int(var, val, res) \
ATOMIC_ADD_EXCHANGE_suffix("l", var, val, res)
# define ATOMIC_ADD_EXCHANGE_int64(var, val, res) \
ATOMIC_ADD_EXCHANGE_suffix("q", var, val, res)
# define ATOMIC_COMPARE_EXCHANGE_suffix(suffix, var, compare, exchange) \
__asm__ __volatile__("lock cmpxchg" suffix " %2,%0" \
: "=m"(var) \
: "a"(compare), "r"(exchange) \
: "cc", "memory")
# define ATOMIC_COMPARE_EXCHANGE_int(var, compare, exchange) \
ATOMIC_COMPARE_EXCHANGE_suffix("l", var, compare, exchange)
# define ATOMIC_COMPARE_EXCHANGE_int64(var, compare, exchange) \
ATOMIC_COMPARE_EXCHANGE_suffix("q", var, compare, exchange)
# define ATOMIC_EXCHANGE(var, newval, result) \
__asm __volatile("xchgl %0, %1" \
: "=r"(result), "=m"(var) \
: "0"(newval), "m"(var))
# define MEMORY_STORE_BARRIER()
# define SPINLOCK_PAUSE() __asm__ __volatile__("pause")
# define SERIALIZE_INSTRUCTIONS() \
__asm__ __volatile__("xor %%eax, %%eax; cpuid" \
: \
: \
: "eax", "ebx", "ecx", "edx");
# define SET_FLAG(cc, flag) \
__asm__ __volatile__("set" #cc " %0" : "=qm"(flag) : : "cc", "memory")
# define SET_IF_NOT_ZERO(flag) SET_FLAG(nz, flag)
# define SET_IF_NOT_LESS(flag) SET_FLAG(nl, flag)
# elif defined(DR_HOST_AARCH64)
# define ATOMIC_1BYTE_READ(addr_src, addr_res) \
do { \
\
__asm__ __volatile__("ldarb w0, [%0]\n\t" \
"strb w0, [%1]" \
: \
: "r"(addr_src), "r"(addr_res) \
: "w0", "memory"); \
} while (0)
# define ATOMIC_1BYTE_WRITE(target, value, hot_patch) \
do { \
ASSERT(sizeof(*target) == 1); \
\
ASSERT_CURIOSITY(!hot_patch); \
__asm__ __volatile__("stlrb %w0, [%1]" \
: \
: "r"(value), "r"(target) \
: "memory"); \
} while (0)
# define ATOMIC_4BYTE_WRITE(target, value, hot_patch) \
do { \
ASSERT(sizeof(value) == 4); \
\
ASSERT_CURIOSITY(!hot_patch); \
\
\
\
ASSERT(ALIGNED(target, 4)); \
__asm__ __volatile__("stlr %w0, [%1]" \
: \
: "r"(value), "r"(target) \
: "memory"); \
} while (0)
# define ATOMIC_4BYTE_ALIGNED_WRITE ATOMIC_4BYTE_WRITE
# define ATOMIC_4BYTE_ALIGNED_READ(addr_src, addr_res) \
do { \
ASSERT(ALIGNED(addr_src, 4)); \
\
__asm__ __volatile__("ldar w0, [%0]\n\t" \
"str w0, [%1]" \
: \
: "r"(addr_src), "r"(addr_res) \
: "w0", "memory"); \
} while (0)
# define ATOMIC_8BYTE_WRITE(target, value, hot_patch) \
do { \
ASSERT(sizeof(value) == 8); \
\
ASSERT_CURIOSITY(!hot_patch); \
ASSERT(ALIGNED(target, 8)); \
__asm__ __volatile__("stlr %0, [%1]" \
: \
: "r"(value), "r"(target) \
: "memory"); \
} while (0)
# define ATOMIC_8BYTE_ALIGNED_WRITE ATOMIC_8BYTE_WRITE
# define ATOMIC_8BYTE_ALIGNED_READ(addr_src, addr_res) \
do { \
ASSERT(ALIGNED(addr_src, 8)); \
\
__asm__ __volatile__("ldar x0, [%0]\n\t" \
"str x0, [%1]" \
: \
: "r"(addr_src), "r"(addr_res) \
: "x0", "memory"); \
} while (0)
# define DEF_ATOMIC_incdec(fname, type, r, op) \
static inline void fname(volatile type *var) \
{ \
type tmp1; \
int tmp2; \
__asm__ __volatile__("1: ldaxr %" r "0, [%x2] \n\t" \
" " op " %" r "0, %" r "0, #1 \n\t" \
" stlxr %w1, %" r "0, [%x2] \n\t" \
" cbnz %w1, 1b \n\t" \
: "=&r"(tmp1), "=&r"(tmp2) \
: "r"(var)); \
}
DEF_ATOMIC_incdec(ATOMIC_INC_int, int, "w", "add") DEF_ATOMIC_incdec(ATOMIC_INC_int64,
int64, "x", "add")
DEF_ATOMIC_incdec(ATOMIC_DEC_int, int, "w", "sub")
DEF_ATOMIC_incdec(ATOMIC_DEC_int64, int64, "x", "sub")
# undef DEF_ATOMIC_incdec
# define ATOMIC_INC(type, var) ATOMIC_INC_##type(&var)
# define ATOMIC_DEC(type, var) ATOMIC_DEC_##type(&var)
# define DEF_ATOMIC_ADD(fname, type, r) \
static inline void fname(volatile type *var, type val) \
{ \
type tmp1; \
int tmp2; \
__asm__ __volatile__("1: ldaxr %" r "0, [%x2] \n\t" \
" add %" r "0, %" r "0, %" r "3 \n\t" \
" stlxr %w1, %" r "0, [%x2] \n\t" \
" cbnz %w1, 1b \n\t" \
: "=&r"(tmp1), "=&r"(tmp2) \
: "r"(var), "r"(val)); \
}
DEF_ATOMIC_ADD(ATOMIC_ADD_int, int, "w") DEF_ATOMIC_ADD(ATOMIC_ADD_int64,
int64, "x")
# undef DEF_ATOMIC_ADD
# define ATOMIC_ADD(type, var, val) ATOMIC_ADD_##type(&var, val)
# define DEF_ATOMIC_ADD_EXCHANGE(fname, type, reg) \
static inline type fname(volatile type *var, type val) \
{ \
type ret; \
int tmp; \
__asm__ __volatile__("1: ldaxr %" reg "1, [%x2] \n\t" \
" add %" reg "1, %" reg "1, %" reg "3 \n\t" \
" stlxr %w0, %" reg "1, [%x2] \n\t" \
" cbnz %w0, 1b \n\t" \
: "=&r"(tmp), "=&r"(ret) \
: "r"(var), "r"(val)); \
return ret; \
}
DEF_ATOMIC_ADD_EXCHANGE(atomic_add_exchange_int, int, "w")
DEF_ATOMIC_ADD_EXCHANGE(atomic_add_exchange_int64, int64, "x")
# undef DEF_ATOMIC_ADD_EXCHANGE
# define atomic_add_exchange atomic_add_exchange_int
# define DEF_atomic_compare_exchange(fname, type, r) \
static inline bool fname(volatile type *var, type compare, type exchange) \
{ \
type tmp1; \
int tmp2; \
bool ret; \
__asm__ __volatile__("1: ldaxr %" r "0, [%x3] \n\t" \
" cmp %" r "0, %" r "4 \n\t" \
" b.ne 2f \n\t" \
" stlxr %w1, %" r "5, [%x3] \n\t" \
" cbnz %w1, 1b \n\t" \
" cmp %" r "0, %" r "4 \n\t" \
"2: clrex \n\t" \
" cset %w2, eq \n\t" \
: "=&r"(tmp1), "=&r"(tmp2), "=r"(ret) \
: "r"(var), "r"(compare), "r"(exchange)); \
return ret; \
}
DEF_atomic_compare_exchange(atomic_compare_exchange_int, int, "w")
DEF_atomic_compare_exchange(atomic_compare_exchange_int64,
int64, "x")
# undef DEF_atomic_compare_exchange
static inline int atomic_exchange_int(volatile int *var,
int newval)
{
int tmp, ret;
__asm__ __volatile__("1: ldaxr %w0, [%x2] \n\t"
" stlxr %w1, %w3, [%x2] \n\t"
" cbnz %w1, 1b \n\t"
: "=&r"(ret), "=&r"(tmp)
: "r"(var), "r"(newval));
return ret;
}
# define MEMORY_STORE_BARRIER() __asm__ __volatile__("dmb st")
* XXX: Consider issuing "sev" on lock release?
*/
# define SPINLOCK_PAUSE() \
do { \
__asm__ __volatile__("wfe"); \
} while (0)
static inline bool
atomic_inc_and_test(volatile int *var)
{
return atomic_add_exchange_int(var, 1) == 0;
}
static inline bool
atomic_dec_and_test(volatile int *var)
{
return atomic_add_exchange_int(var, -1) == -1;
}
static inline bool
atomic_dec_becomes_zero(volatile int *var)
{
return atomic_add_exchange_int(var, -1) == 0;
}
# elif defined(DR_HOST_ARM)
# define ATOMIC_1BYTE_READ(addr_src, addr_res) \
do { \
__asm__ __volatile__("ldrb r0, [%0]; dmb ish; strb r0, [%1]" \
: \
: "r"(addr_src), "r"(addr_res) \
: "r0", "memory"); \
} while (0)
# define ATOMIC_1BYTE_WRITE(target, value, hot_patch) \
do { \
ASSERT(sizeof(*target) == 1); \
__asm__ __volatile__("dmb ish; strb %0, [%1]" \
: \
: "r"(value), "r"(target) \
: "memory"); \
} while (0)
# define ATOMIC_4BYTE_WRITE(target, value, hot_patch) \
do { \
ASSERT(sizeof(value) == 4); \
\
\
ASSERT(ALIGNED(target, 4)); \
__asm__ __volatile__("dmb ish; str %0, [%1]" \
: \
: "r"(value), "r"(target) \
: "memory"); \
} while (0)
# define ATOMIC_4BYTE_ALIGNED_WRITE ATOMIC_4BYTE_WRITE
# define ATOMIC_4BYTE_ALIGNED_READ(addr_src, addr_res) \
do { \
ASSERT(ALIGNED(addr_src, 4)); \
__asm__ __volatile__("ldr r0, [%0]; dmb ish; str r0, [%1]" \
: \
: "r"(addr_src), "r"(addr_res) \
: "r0", "memory"); \
} while (0)
* ARMv6 for ARM synchronization primitives
*/
* a non word-aligned memory address causes UNPREDICTABLE behavior.",
* so we require alignment here.
*/
# define ATOMIC_INC_suffix(suffix, var) \
__asm__ __volatile__(" dmb ish \n\t" \
"1: ldrex" suffix " r2, %0 \n\t" \
" add" suffix " r2, r2, #1 \n\t" \
" strex" suffix " r3, r2, %0 \n\t" \
" cmp r3, #0 \n\t" \
" bne 1b \n\t" \
" cmp r2, #0" \
: "=Q"(var) \
: \
: "cc", "memory", "r2", "r3");
# define ATOMIC_INC_int(var) ATOMIC_INC_suffix("", var)
# define ATOMIC_INC_int64(var) ATOMIC_INC_suffix("d", var)
# define ATOMIC_INC(type, var) ATOMIC_INC_##type(var)
# define ATOMIC_DEC_suffix(suffix, var) \
__asm__ __volatile__(" dmb ish \n\t" \
"1: ldrex" suffix " r2, %0 \n\t" \
" sub" suffix " r2, r2, #1 \n\t" \
" strex" suffix " r3, r2, %0 \n\t" \
" cmp r3, #0 \n\t" \
" bne 1b \n\t" \
" cmp r2, #0" \
: "=Q"(var) \
: \
: "cc", "memory", "r2", "r3");
# define ATOMIC_DEC_int(var) ATOMIC_DEC_suffix("", var)
# define ATOMIC_DEC_int64(var) ATOMIC_DEC_suffix("d", var)
# define ATOMIC_DEC(type, var) ATOMIC_DEC_##type(var)
# define ATOMIC_ADD_suffix(suffix, var, value) \
__asm__ __volatile__(" dmb ish \n\t" \
"1: ldrex" suffix " r2, %0 \n\t" \
" add" suffix " r2, r2, %1 \n\t" \
" strex" suffix " r3, r2, %0 \n\t" \
" cmp r3, #0 \n\t" \
" bne 1b \n\t" \
" cmp r2, #0" \
: "=Q"(var) \
: "r"(value) \
: "cc", "memory", "r2", "r3");
# define ATOMIC_ADD_int(var, val) ATOMIC_ADD_suffix("", var, val)
# define ATOMIC_ADD_int64(var, val) ATOMIC_ADD_suffix("q", var, val)
# define ATOMIC_ADD(type, var, val) ATOMIC_ADD_##type(var, val)
# define ATOMIC_ADD_EXCHANGE_suffix(suffix, var, value, result) \
__asm__ __volatile__(" dmb ish \n\t" \
"1: ldrex" suffix " r2, %0 \n\t" \
" add" suffix " r2, r2, %2 \n\t" \
" strex" suffix " r3, r2, %0 \n\t" \
" cmp r3, #0 \n\t" \
" bne 1b \n\t" \
" sub" suffix " r2, r2, %2 \n\t" \
" str" suffix " r2, %1" \
: "=Q"(*var), "=m"(result) \
: "r"(value) \
: "cc", "memory", "r2", "r3");
# define ATOMIC_ADD_EXCHANGE_int(var, val, res) \
ATOMIC_ADD_EXCHANGE_suffix("", var, val, res)
# define ATOMIC_ADD_EXCHANGE_int64(var, val, res) \
ATOMIC_ADD_EXCHANGE_suffix("d", var, val, res)
# define ATOMIC_COMPARE_EXCHANGE_suffix(suffix, var, compare, exchange) \
__asm__ __volatile__(" dmb ish \n\t" \
"2: ldrex" suffix " r2, %0 \n\t" \
" cmp" suffix " r2, %1 \n\t" \
" bne 1f \n\t" \
" strex" suffix " r3, %2, %0 \n\t" \
" cmp r3, #0 \n\t" \
" bne 2b \n\t" \
" cmp" suffix " r2, %1 \n\t" \
"1: clrex \n\t" \
: "=Q"(var) \
: "r"(compare), "r"(exchange) \
: "cc", "memory", "r2", "r3");
# define ATOMIC_COMPARE_EXCHANGE_int(var, compare, exchange) \
ATOMIC_COMPARE_EXCHANGE_suffix("", var, compare, exchange)
# define ATOMIC_COMPARE_EXCHANGE_int64(var, compare, exchange) \
ATOMIC_COMPARE_EXCHANGE_suffix("d", var, compare, exchange)
# define ATOMIC_EXCHANGE(var, newval, result) \
__asm__ __volatile__(" dmb ish \n\t" \
"1: ldrex r2, %0 \n\t" \
" strex r3, %2, %0 \n\t" \
" cmp r3, #0 \n\t" \
" bne 1b \n\t" \
" str r2, %1" \
: "=Q"(var), "=m"(result) \
: "r"(newval) \
: "cc", "memory", "r2", "r3");
# define MEMORY_STORE_BARRIER() __asm__ __volatile__("dmb st")
* XXX: Consider issuing "sev" on lock release?
*/
# define SPINLOCK_PAUSE() __asm__ __volatile__("wfe")
# define SERIALIZE_INSTRUCTIONS() __asm__ __volatile__("clrex");
# define SET_FLAG(cc, flag) \
__asm__ __volatile__(" mov r2, #1 \n\t" \
" b" #cc " 1f \n\t" \
" mov r2, #0 \n\t" \
"1: strb r2, %0" \
: "=m"(flag) \
: \
: "r2")
# define SET_IF_NOT_ZERO(flag) SET_FLAG(ne, flag)
# define SET_IF_NOT_LESS(flag) SET_FLAG(ge, flag)
# elif defined(DR_HOST_RISCV64)
# define ATOMIC_1BYTE_READ(addr_src, addr_res) \
do { \
\
__asm__ __volatile__("lbu t0, 0(%0)\n\t" \
"sb t0, 0(%1)\n\t" \
"fence iorw,iorw" \
: \
: "r"(addr_src), "r"(addr_res) \
: "memory"); \
} while (0)
# define ATOMIC_1BYTE_WRITE(target, value, hot_patch) \
do { \
\
ASSERT_CURIOSITY(!hot_patch); \
\
__asm__ __volatile__("fence iorw,iorw\n\t" \
"sb %0, 0(%1)\n\t" \
: \
: "r"(value), "r"(target) \
: "memory"); \
} while (0)
# define ATOMIC_4BYTE_ALIGNED_WRITE(target, value, hot_patch) \
do { \
* V20191213 states that aligned loads/stores are \
* atomic. Alternatively we could use \
* amoswap.w zero %0, 0(%1) but that would incur \
* an extra read we don't need and also requires \
* alignment. \
*/ \
ASSERT(ALIGNED(target, 4)); \
\
ASSERT_CURIOSITY(!hot_patch); \
\
__asm__ __volatile__("fence iorw,iorw\n\t" \
"sw %0, 0(%1)\n\t" \
: \
: "r"(value), "r"(target) \
: "memory"); \
} while (0)
# define ATOMIC_4BYTE_WRITE ATOMIC_4BYTE_ALIGNED_WRITE
# define ATOMIC_4BYTE_ALIGNED_READ(addr_src, addr_res) \
do { \
* V20191213 states that aligned loads/stores are \
* atomic. \
*/ \
ASSERT(ALIGNED(addr_src, 4)); \
ASSERT(ALIGNED(addr_res, 4)); \
\
__asm__ __volatile__("lw t0, 0(%0)\n\t" \
"sw t0, 0(%1) \n\t" \
"fence iorw,iorw" \
: \
: "r"(addr_src), "r"(addr_res) \
: "memory"); \
} while (0)
# define ATOMIC_8BYTE_ALIGNED_WRITE(target, value, hot_patch) \
do { \
* V20191213 states that aligned loads/stores are \
* atomic. Alternatively we could use \
* amoswap.w zero %0, 0(%1) but that would incur \
* an extra read we don't need and also requires \
* alignment. \
*/ \
ASSERT(ALIGNED(target, 8)); \
\
ASSERT_CURIOSITY(!hot_patch); \
\
__asm__ __volatile__("fence iorw,iorw\n\t" \
"sd %1, 0(%0)\n\t" \
: \
: "r"(target), "r"(value) \
: "memory"); \
} while (0)
# define ATOMIC_8BYTE_WRITE ATOMIC_8BYTE_ALIGNED_WRITE
# define ATOMIC_8BYTE_ALIGNED_READ(addr_src, addr_res) \
do { \
* V20191213 states that aligned loads/stores are \
* atomic. \
*/ \
ASSERT(ALIGNED(addr_src, 8)); \
ASSERT(ALIGNED(addr_res, 8)); \
__asm__ __volatile__("ld t0, 0(%0)\n\t" \
"sd t0, 0(%1)\n\t" \
"fence iorw,iorw" \
: \
: "r"(addr_src), "r"(addr_res) \
: "memory"); \
} while (0)
# define ADDI_suffix(sfx, var, val, align) \
do { \
* requires that var address is naturally aligned. \
*/ \
ASSERT(ALIGNED(var, align)); \
__asm__ __volatile__("li t0, " #val "\n\t" \
"amoadd." sfx ".aqrl zero, t0, (%0)\n\t" \
: \
: "r"(var) \
: "t0", "memory"); \
} while (0)
static inline void
ATOMIC_INC_int(volatile int *var)
{
ADDI_suffix("w", var, 1, 4);
}
static inline void
ATOMIC_INC_int64(volatile int64 *var)
{
ADDI_suffix("d", var, 1, 8);
}
static inline void
ATOMIC_DEC_int(volatile int *var)
{
ADDI_suffix("w", var, -1, 4);
}
static inline void
ATOMIC_DEC_int64(volatile int64 *var)
{
ADDI_suffix("d", var, -1, 8);
}
# define ATOMIC_INC(type, var) ATOMIC_INC_##type(&var)
# define ATOMIC_DEC(type, var) ATOMIC_DEC_##type(&var)
# undef ADDI_suffix
static inline void
ATOMIC_ADD_int(volatile int *var, int val)
{
* naturally aligned.
*/
ASSERT(ALIGNED(var, 4));
__asm__ __volatile__("amoadd.w.aqrl zero, %1, (%0)\n\t"
:
: "r"(var), "r"(val)
: "memory");
}
static inline void
ATOMIC_ADD_int64(volatile int64 *var, int64 val)
{
* naturally aligned.
*/
ASSERT(ALIGNED(var, 8));
__asm__ __volatile__("amoadd.d.aqrl zero, %1, (%0)\n\t"
:
: "r"(var), "r"(val)
: "memory");
}
# define ATOMIC_ADD(type, var, val) ATOMIC_ADD_##type(&var, val)
static inline int
atomic_add_exchange_int(volatile int *var, int val)
{
int res = 0;
* naturally aligned.
*/
ASSERT(ALIGNED(var, 4));
__asm__ __volatile__("amoadd.w.aqrl %0, %2, (%1)\n\t"
: "=r"(res)
: "r"(var), "r"(val)
: "memory");
return res + val;
}
static inline int64
atomic_add_exchange_int64(volatile int64 *var, int64 val)
{
int64 res = 0;
* naturally aligned.
*/
ASSERT(ALIGNED(var, 8));
__asm__ __volatile__("amoadd.d.aqrl %0, %2, (%1)\n\t"
: "=r"(res)
: "r"(var), "r"(val)
: "memory");
return res + val;
}
# define atomic_add_exchange atomic_add_exchange_int
static inline bool
atomic_compare_exchange_int(volatile int *var, int compare, int exchange)
{
bool res = 1;
* naturally aligned.
*/
ASSERT(ALIGNED(var, 4));
* The fence ensures no re-ordering beyond the start of the lr/sc loop.
* Provides acquire/release semantics for LR/SC sequences by setting the
* aq and rl bits on lr and sc respectively.
*/
__asm__ __volatile__("fence iorw,ow\n\t"
"1: lr.w.aq t0, (%1)\n\t"
" bne t0, %2, 2f\n\t"
" sc.w.rl %0, %3, (%1)\n\t"
" bnez %0, 1b\n\t"
"2:\n\t"
: "+r"(res)
: "r"(var), "r"(compare), "r"(exchange)
: "t0", "memory");
return res == 0;
}
static inline bool
atomic_compare_exchange_int64(volatile int64 *var, int64 compare, int64 exchange)
{
bool res = 1;
* naturally aligned.
*/
ASSERT(ALIGNED(var, 8));
* The fence ensures no re-ordering beyond the start of the lr/sc loop.
* Provides acquire/release semantics for LR/SC sequences by setting the
* aq and rl bits on lr and sc respectively.
*/
__asm__ __volatile__("fence iorw,ow\n\t"
"1: lr.d.aq t0, (%1)\n\t"
" bne t0, %2, 2f\n\t"
" sc.d.rl %0, %3, (%1)\n\t"
" bnez %0, 1b\n\t"
"2:\n\t"
: "+r"(res)
: "r"(var), "r"(compare), "r"(exchange)
: "t0", "memory");
return res == 0;
}
# define ATOMIC_COMPARE_EXCHANGE_int(var, compare, exchange) \
atomic_compare_exchange_int(var, compare, exchange)
# define ATOMIC_COMPARE_EXCHANGE_int64(var, compare, exchange) \
atomic_compare_exchange_int64(var, compare, exchange)
static inline int
atomic_exchange_int(volatile int *var, int newval)
{
int res;
* is naturally aligned.
*/
ASSERT(ALIGNED(var, 4));
__asm__ __volatile__("amoswap.w.aqrl %0, %2, (%1)\n\t"
: "=r"(res)
: "r"(var), "r"(newval)
: "memory");
return res;
}
# define MEMORY_STORE_BARRIER() __asm__ __volatile__("fence w,w")
* will work even on platforms without Zihintpause extension because this
* is a FENCE hint instruction which evaluates to NOP then.
*/
# define SPINLOCK_PAUSE() __asm__ __volatile__(".int 0x0100000F");
static inline bool
atomic_inc_and_test(volatile int *var)
{
return atomic_add_exchange_int(var, 1) == 0;
}
static inline bool
atomic_dec_and_test(volatile int *var)
{
return atomic_add_exchange_int(var, -1) == -1;
}
static inline bool
atomic_dec_becomes_zero(volatile int *var)
{
return atomic_add_exchange_int(var, -1) == 0;
}
# endif
# ifdef X64
# define ATOMIC_ADD_PTR(type, var, val) ATOMIC_ADD_int64(var, val)
# else
# define ATOMIC_ADD_PTR(type, var, val) ATOMIC_ADD_int(var, val)
# endif
# define ATOMIC_COMPARE_EXCHANGE ATOMIC_COMPARE_EXCHANGE_int
# ifdef X64
# define ATOMIC_COMPARE_EXCHANGE_PTR ATOMIC_COMPARE_EXCHANGE_int64
# else
# define ATOMIC_COMPARE_EXCHANGE_PTR ATOMIC_COMPARE_EXCHANGE
# endif
# if !defined(DR_HOST_AARCH64) && !defined(DR_HOST_RISCV64)
* Returns true if the resulting value is zero, otherwise returns false
*/
static inline bool
atomic_inc_and_test(volatile int *var)
{
unsigned char c;
ATOMIC_INC(int, *var);
SET_IF_NOT_ZERO(c);
although we could put the return value in EAX ourselves */
return c == 0;
}
* Returns true if the initial value was zero, otherwise returns false
*/
static inline bool
atomic_dec_and_test(volatile int *var)
{
unsigned char c;
ATOMIC_DEC(int, *var);
* convert that back to C.
*/
SET_IF_NOT_LESS(c);
although we could put the return value in EAX ourselves */
return c == 0;
}
* Returns true if the resulting value is zero, otherwise returns false
*/
static inline bool
atomic_dec_becomes_zero(volatile int *var)
{
unsigned char c;
ATOMIC_DEC(int, *var);
* convert that back to C.
*/
SET_IF_NOT_ZERO(c);
although we could put the return value in EAX ourselves */
return c == 0;
}
otherwise returns false
*/
static inline bool
atomic_compare_exchange_int(volatile int *var, int compare, int exchange)
{
unsigned char c;
ATOMIC_COMPARE_EXCHANGE(*var, compare, exchange);
SET_IF_NOT_ZERO(c);
although we could put the return value in EAX ourselves */
return c == 0;
}
static inline int
atomic_exchange_int(volatile int *var, int newval)
{
int result;
ATOMIC_EXCHANGE(*var, newval, result);
return result;
}
# ifdef X64
otherwise returns false
*/
static inline bool
atomic_compare_exchange_int64(volatile int64 *var, int64 compare, int64 exchange)
{
unsigned char c;
ATOMIC_COMPARE_EXCHANGE_int64(*var, compare, exchange);
SET_IF_NOT_ZERO(c);
although we could put the return value in EAX ourselves */
return c == 0;
}
# endif
static inline int
atomic_add_exchange_int(volatile int *var, int value)
{
int temp;
ATOMIC_ADD_EXCHANGE_int(var, value, temp);
return (temp + value);
}
static inline int64
atomic_add_exchange_int64(volatile int64 *var, int64 value)
{
int64 temp;
ATOMIC_ADD_EXCHANGE_int64(var, value, temp);
return (temp + value);
}
# define atomic_add_exchange atomic_add_exchange_int
# undef ATOMIC_ADD_EXCHANGE_suffix
# undef ATOMIC_ADD_EXCHANGE_int
# undef ATOMIC_ADD_EXCHANGE_int64
# endif
#endif
static inline int
atomic_aligned_read_int(volatile int *var)
{
int temp = 0;
ATOMIC_4BYTE_ALIGNED_READ(var, &temp);
return temp;
}
static inline bool
atomic_read_bool(volatile bool *var)
{
bool temp = 0;
ATOMIC_1BYTE_READ(var, &temp);
return temp;
}
#ifdef X64
static inline int64
atomic_aligned_read_int64(volatile int64 *var)
{
int64 temp = 0;
ATOMIC_8BYTE_ALIGNED_READ(var, &temp);
return temp;
}
#endif
#define atomic_compare_exchange atomic_compare_exchange_int
#ifdef X64
# define atomic_compare_exchange_ptr(v, c, e) \
atomic_compare_exchange_int64((volatile int64 *)(v), (int64)(c), (int64)(e))
# define ATOMIC_ADDR_WRITE ATOMIC_8BYTE_WRITE
#else
# define atomic_compare_exchange_ptr(v, c, e) \
atomic_compare_exchange_int((volatile int *)(v), (int)(c), (int)(e))
# define ATOMIC_ADDR_WRITE ATOMIC_4BYTE_WRITE
#endif
#define ATOMIC_MAX_int(type, maxvar, curvar) \
do { \
type atomic_max__maxval; \
type atomic_max__curval = (curvar); \
ASSERT(sizeof(int) == sizeof(maxvar)); \
ASSERT(sizeof(type) == sizeof(maxvar)); \
ASSERT(sizeof(type) == sizeof(curvar)); \
do { \
atomic_max__maxval = atomic_aligned_read_int((int *)&(maxvar)); \
} while (atomic_max__maxval < atomic_max__curval && \
!atomic_compare_exchange_int((int *)&(maxvar), atomic_max__maxval, \
atomic_max__curval)); \
} while (0)
#ifdef X64
# define ATOMIC_MAX_int64(type, maxvar, curvar) \
do { \
type atomic_max__maxval; \
type atomic_max__curval = (curvar); \
ASSERT(sizeof(int64) == sizeof(maxvar)); \
ASSERT(sizeof(type) == sizeof(maxvar)); \
ASSERT(sizeof(type) == sizeof(curvar)); \
do { \
atomic_max__maxval = atomic_aligned_read_int64((int64 *)&(maxvar)); \
} while (atomic_max__maxval < atomic_max__curval && \
!atomic_compare_exchange_int64( \
(int64 *)&(maxvar), atomic_max__maxval, atomic_max__curval)); \
} while (0)
#endif
* is a Store-Release and ensures prior stores in program order in this thread
* are not observed by another thread after this store.
*/
#ifdef X64
# define ATOMIC_PTRSZ_ALIGNED_WRITE(target, value, hot_patch) \
ATOMIC_8BYTE_ALIGNED_WRITE(target, value, hot_patch)
#else
# define ATOMIC_PTRSZ_ALIGNED_WRITE(target, value, hot_patch) \
ATOMIC_4BYTE_ALIGNED_WRITE(target, value, hot_patch)
#endif
#define ATOMIC_MAX(type, maxvar, curvar) ATOMIC_MAX_##type(type, maxvar, curvar)
#endif