/* **********************************************************
 * Copyright (c) 2011-2021 Google, Inc.  All rights reserved.
 * Copyright (c) 2001-2010 VMware, Inc.  All rights reserved.
 * ********************************************************** */

/*
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * * Redistributions of source code must retain the above copyright notice,
 *   this list of conditions and the following disclaimer.
 *
 * * Redistributions in binary form must reproduce the above copyright notice,
 *   this list of conditions and the following disclaimer in the documentation
 *   and/or other materials provided with the distribution.
 *
 * * Neither the name of VMware, Inc. nor the names of its contributors may be
 *   used to endorse or promote products derived from this software without
 *   specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
 * DAMAGE.
 */

/* Copyright (c) 2003-2007 Determina Corp. */
/* Copyright (c) 2001-2003 Massachusetts Institute of Technology */
/* Copyright (c) 2001 Hewlett-Packard Company */

/*
 * x86_asm_defines.asm - shared assembly defines
 */

#ifndef _X86_ASM_DEFINES_ASM_
#define _X86_ASM_DEFINES_ASM_ 1

/* We should give asm_defines.asm all unique names and then include globals.h
 * and avoid all this duplication!
 */
#ifdef X64
# ifdef WINDOWS
#  define MCXT_NUM_SIMD_SLOTS 6 /* [xy]mm0-5 */
# else
#  define MCXT_NUM_SIMD_SLOTS 32 /* [xyz]mm0-31 */
# endif
# define PRE_XMM_PADDING 48
#else
# define MCXT_NUM_SIMD_SLOTS 8 /* [xyz]mm0-7 */
# define PRE_XMM_PADDING 24
#endif
#define MCXT_NUM_OPMASK_SLOTS 8
#define OPMASK_AVX512BW_REG_SIZE 8
#define OPMASK_AVX512F_REG_SIZE 2
#define ZMM_REG_SIZE 64
#define MCXT_SIMD_SLOT_SIZE ZMM_REG_SIZE
/* xmm0-5/7/15 for PR 264138/i#139/PR 302107 */
#define MCXT_TOTAL_SIMD_SLOTS_SIZE ((MCXT_NUM_SIMD_SLOTS)*(MCXT_SIMD_SLOT_SIZE))
#define MCXT_TOTAL_OPMASK_SLOTS_SIZE ((MCXT_NUM_OPMASK_SLOTS)*(OPMASK_AVX512BW_REG_SIZE))

#ifdef X64
/* push GPR registers in priv_mcontext_t order.  does NOT make xsp have a
 * pre-push value as no callers need that (they all use PUSH_PRIV_MCXT).
 * Leaves space for, but does NOT fill in, the xmm0-5 slots (PR 264138),
 * since it's hard to dynamically figure out during bootstrapping whether
 * movdqu or movups are legal instructions.  The caller is expected
 * to fill in the xmm values prior to any calls that may clobber them.
 */
# define PUSHGPR \
        push     r15 @N@\
        push     r14 @N@\
        push     r13 @N@\
        push     r12 @N@\
        push     r11 @N@\
        push     r10 @N@\
        push     r9  @N@\
        push     r8  @N@\
        push     rax @N@\
        push     rcx @N@\
        push     rdx @N@\
        push     rbx @N@\
        /* not the pusha pre-push rsp value but see above */ @N@\
        push     rsp @N@\
        push     rbp @N@\
        push     rsi @N@\
        push     rdi
# define POPGPR        \
        pop      rdi @N@\
        pop      rsi @N@\
        pop      rbp @N@\
        pop      rbx /* rsp into dead rbx */ @N@\
        pop      rbx @N@\
        pop      rdx @N@\
        pop      rcx @N@\
        pop      rax @N@\
        pop      r8  @N@\
        pop      r9  @N@\
        pop      r10 @N@\
        pop      r11 @N@\
        pop      r12 @N@\
        pop      r13 @N@\
        pop      r14 @N@\
        pop      r15 @N@
# define PRIV_MCXT_SIZE (18*ARG_SZ + PRE_XMM_PADDING + MCXT_TOTAL_SIMD_SLOTS_SIZE + \
                         MCXT_TOTAL_OPMASK_SLOTS_SIZE)
# define dstack_OFFSET     (PRIV_MCXT_SIZE+UPCXT_EXTRA+3*ARG_SZ)
# define MCONTEXT_PC_OFFS  (17*ARG_SZ)
#else
# define PUSHGPR \
        pusha
# define POPGPR  \
        popa
# define PRIV_MCXT_SIZE (10*ARG_SZ + PRE_XMM_PADDING + MCXT_TOTAL_SIMD_SLOTS_SIZE + \
                         MCXT_TOTAL_OPMASK_SLOTS_SIZE)
# define dstack_OFFSET     (PRIV_MCXT_SIZE+UPCXT_EXTRA+3*ARG_SZ)
# define MCONTEXT_PC_OFFS  (9*ARG_SZ)
#endif
/* offsetof(dcontext_t, is_exiting) */
#define is_exiting_OFFSET (dstack_OFFSET+1*ARG_SZ)
#define PUSHGPR_XAX_OFFS  (7*ARG_SZ)
#define PUSHGPR_XSP_OFFS  (3*ARG_SZ)
#define MCONTEXT_XSP_OFFS (PUSHGPR_XSP_OFFS)
#define MCONTEXT_XCX_OFFS (MCONTEXT_XSP_OFFS + 3*ARG_SZ)
#define MCONTEXT_XAX_OFFS (MCONTEXT_XSP_OFFS + 4*ARG_SZ)
#define PUSH_PRIV_MCXT_PRE_PC_SHIFT (- MCXT_TOTAL_SIMD_SLOTS_SIZE - \
                                     MCXT_TOTAL_OPMASK_SLOTS_SIZE - PRE_XMM_PADDING)

#if defined(WINDOWS) && !defined(X64)
/* FIXME: check these selector values on all platforms: these are for XPSP2.
 * Keep in synch w/ defines in arch.h.
 */
# define CS32_SELECTOR HEX(23)
# define CS64_SELECTOR HEX(33)
#endif

/* Defines shared between safe_read_asm() and the memcpy() and memset()
 * implementations in x86_asm_shared.asm.
 */
#ifdef X64
# define PTRSZ_SHIFT_BITS 3
# define PTRSZ_SUFFIXED(string_op) string_op##q
# ifdef UNIX
#  define ARGS_TO_XDI_XSI_XDX()         /* ABI handles this. */
#  define RESTORE_XDI_XSI()             /* Not needed. */
# else /* WINDOWS */
/* Get args 1, 2, 3 into rdi, rsi, and rdx. */
#  define ARGS_TO_XDI_XSI_XDX() \
        push     rdi                            @N@\
        push     rsi                            @N@\
        mov      rdi, ARG1                      @N@\
        mov      rsi, ARG2                      @N@\
        mov      rdx, ARG3
#  define RESTORE_XDI_XSI() \
        pop      rsi                            @N@\
        pop      rdi
# endif /* WINDOWS */
#else
# define PTRSZ_SHIFT_BITS 2
# define PTRSZ_SUFFIXED(string_op) string_op##d

/* Get args 1, 2, 3 into edi, esi, and edx to match Linux x64 ABI.  Need to save
 * edi and esi since they are callee-saved.  The ARGN macros can't handle
 * stack adjustments, so use the scratch regs eax and ecx to hold the args
 * before the pushes.
 */
# define ARGS_TO_XDI_XSI_XDX() \
        mov     eax, ARG1                       @N@\
        mov     ecx, ARG2                       @N@\
        mov     edx, ARG3                       @N@\
        push    edi                             @N@\
        push    esi                             @N@\
        mov     edi, eax                        @N@\
        mov     esi, ecx
# define RESTORE_XDI_XSI() \
        pop esi                                 @N@\
        pop edi
#endif

/* Repeats string_op for XDX bytes using aligned pointer-sized operations when
 * possible.  Assumes that string_op works by counting down until XCX reaches
 * zero.  The pointer-sized string ops are aligned based on ptr_to_align.
 * For string ops that have both a src and dst, aligning based on src is
 * preferred, subject to micro-architectural differences.
 *
 * XXX: glibc memcpy uses SSE instructions to copy, which is 10% faster on x64
 * and ~2x faster for 20kb copies on plain x86.  Using SSE is quite complicated,
 * because it means doing cpuid checks and loop unrolling.  Many of our string
 * operations are short anyway.  For safe_read, it also increases the number of
 * potentially faulting PCs.
 */
#define REP_STRING_OP(funcname, ptr_to_align, string_op) \
        mov     REG_XCX, ptr_to_align                           @N@\
        and     REG_XCX, (ARG_SZ - 1)                           @N@\
        jz      funcname##_aligned                              @N@\
        neg     REG_XCX                                         @N@\
        add     REG_XCX, ARG_SZ                                 @N@\
        cmp     REG_XDX, REG_XCX  /* if (n < xcx) */            @N@\
        cmovb   REG_XCX, REG_XDX  /*     xcx = n; */            @N@\
        sub     REG_XDX, REG_XCX                                @N@\
ADDRTAKEN_LABEL(funcname##_pre:)                                @N@\
        rep string_op##b                                        @N@\
funcname##_aligned:                                             @N@\
        /* Aligned word-size ops. */                            @N@\
        mov     REG_XCX, REG_XDX                                @N@\
        shr     REG_XCX, PTRSZ_SHIFT_BITS                       @N@\
ADDRTAKEN_LABEL(funcname##_mid:)                                @N@\
        rep PTRSZ_SUFFIXED(string_op)                           @N@\
        /* Handle trailing bytes. */                            @N@\
        mov     REG_XCX, REG_XDX                                @N@\
        and     REG_XCX, (ARG_SZ - 1)                           @N@\
ADDRTAKEN_LABEL(funcname##_post:)                               @N@\
        rep string_op##b


#endif /* _X86_ASM_DEFINES_ASM_ */