* Copyright (c) 2011-2022 Google, Inc. All rights reserved.
* Copyright (c) 2000-2010 VMware, Inc. All rights reserved.
* **********************************************************/
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
#ifndef DECODE_PRIVATE_H
#define DECODE_PRIVATE_H
* These are used only in the decoding tables. We decode the
* information into the operands.
* For encoding these properties are specified in the operands,
* with our encoder auto-adding the appropriate prefixes.
*/
#define PREFIX_DATA 0x0080
#define PREFIX_ADDR 0x0100
#define PREFIX_REX_W 0x0200
#define PREFIX_REX_R 0x0400
#define PREFIX_REX_X 0x0800
#define PREFIX_REX_B 0x1000
#define PREFIX_REX_GENERAL 0x2000
#define PREFIX_REX_ALL \
(PREFIX_REX_W | PREFIX_REX_R | PREFIX_REX_X | PREFIX_REX_B | PREFIX_REX_GENERAL)
#define PREFIX_SIZE_SPECIFIERS (PREFIX_DATA | PREFIX_ADDR | PREFIX_REX_ALL)
#define PREFIX_REP 0x4000
#define PREFIX_REPNE 0x8000
* space could replace w/ byte value compare.
*/
#define PREFIX_VEX_2B 0x000010000
#define PREFIX_VEX_3B 0x000020000
#define PREFIX_VEX_L 0x000040000
#define PREFIX_XOP 0x000080000
#define PREFIX_EVEX_RR 0x000200000
#define PREFIX_EVEX_LL 0x000400000
#define PREFIX_EVEX_z 0x000800000
#define PREFIX_EVEX_b 0x001000000
#define PREFIX_EVEX_VV 0x002000000
#define SEG_JCC_NOT_TAKEN SEG_CS
#define SEG_JCC_TAKEN SEG_DS
* o get rid of sI? I sign-extend all the Ib's on decoding, many are
* actually sign-extended that aren't marked sIb
* o had to add size to immed_int and base_disp, don't have it for indir_reg!
* that's fine for encoding, aren't duplicate instrs differing on size of
* indir reg data, but should set it properly for later IR passes...
* o rewrite emit.c so does emit_inst only once (instr_length() does it!)
*/
enum {
OPCODE_TWOBYTES = 0x00000010,
OPCODE_REG = 0x00000020,
OPCODE_MODRM = 0x00000040,
OPCODE_SUFFIX = 0x00000080,
OPCODE_THREEBYTES = 0x00000008,
};
* For reading all bytes of instruction, only need to know:
* 1) prefixes + opcode boundary
* 2) whether to read modrm byte, from modrm get sib and disp
* 3) whether to read immed bytes (types A, I, sI, J, and O)
* The rest of the types are for interpretation only
*
* We have room for 2 destinations and 3 sources
* Instrs with 2 dests include push, pop, edx:eax, xchg, rep lods
* Exceptions:
* pusha = 8 sources
* popa = 8 destinations
* enter = 3 dests, 4 srcs (incl. 2 immeds)
* cpuid = 4 dests
* cmpxchg8b = 3 dests, 5 srcs
* movs = 3 dests, rest of string instrs w/ rep/repne have 3+ dests
* Represent exceptions as sequence of instrs?
* All 5 exceptions have only one form, can use list field to
* point into separate table that holds types of extra operands.
*
* Separate immed field and only 2 srcs?
* enter has 2 immeds, all other instrs have <=1
* there are a number of instrs that need 3 srcs even ignoring immeds!
* (pusha,shld,shrd,cmpxchg,cmpxhchg8b,rep outs,rep cmps, rep scas)
* => no separate immed field
*
* FIXMEs:
* lea = computes addr, doesn't touch mem! how encode?
* in & out, ins & outs: are I/O ports in memory?!?
* Should we model fp stack changes?!?
* All i_eSP really read 0x4(esp) or some other offset, depending on
* if take esp value before or after instrs, etc.
* punpck*, pshuf*: say "reads entire x-bits, only uses half of them"
* table is inconsistent in this: for punpck* it gives a memory size of x/2
* (b/c Intel's table gives that), for pshuf* (there are others too I think)
* gives size x (not present in Intel's table)
*/
* 0th (ms) = prefix byte, if byte 3's 1st nibble's bit 3 and bit 4 are both NOT set;
* modrm byte, if byte 3's 1st nibble's bit 3 IS set.
* suffix byte, if byte 3's 1st nibble's bit 4 IS set.
* 1st = 1st byte of opcode
* 2nd = 2nd byte of opcode (if there are 2)
* 3rd (ls) = split into nibbles
* 1st nibble (ms) = if bit 1 (OPCODE_TWOBYTES) set, opcode has 2 bytes
* if REQUIRES_EVEX then this bit instead means that this
* instruction must have evex.b set
* if bit 2 (OPCODE_REG) set, opcode has /n
* if bit 3 (OPCODE_MODRM) set, opcode based on entire modrm
* that modrm is stored as the byte 0.
* if REQUIRES_VEX or REQUIRES_EVEX then this bit instead means
* that this instruction must have vex.W or evex.W set.
* if bit 4 (OPCODE_SUFFIX) set, opcode based on suffix byte
* that byte is stored as the byte 0
* if REQUIRES_VEX or REQUIRES_EVEX then this bit instead means
* that this instruction must have vex.L or evex.L set.
* XXX i#1312: Possibly a case for EVEX_LL (L') needs to be
* supported at some point.
* XXX: so we do not support an instr that has an opcode
* dependent on both a prefix and the entire modrm or suffix!
* XXX: perhaps we should use the flags rather than cramming all
* of this information into the opcode byte, especially with the
* VEX/EVEX dependent behavior.
* 2nd nibble (ls) = bits 1-3 hold /n for OPCODE_REG
* if bit 4 (OPCODE_THREEBYTES) is set, the opcode has
* 3 bytes, with the first being an implied 0x0f (so
* the 2nd byte is stored as "1st" and 3rd as "2nd").
*/
* + for PREFIX: one of the PREFIX_ constants, or SEG_ constant
* + for EXTENSION and *_EXT: index into extensions table
* + for OP_: pointer to next entry of that opcode
* + may also point to extra operand table
*/
* these constants are used for instr_info_t.type field
*/
enum {
INVALID = OP_LAST + 1,
PREFIX,
ESCAPE,
FLOAT_EXT,
EXTENSION,
PREFIX_EXT,
REP_EXT,
REPNE_EXT,
MOD_EXT,
RM_EXT,
SUFFIX_EXT,
X64_EXT,
ESCAPE_3BYTE_38,
ESCAPE_3BYTE_3a,
REX_B_EXT,
REX_W_EXT,
VEX_PREFIX_EXT,
E_VEX_EXT,
VEX_L_EXT,
VEX_W_EXT,
XOP_PREFIX_EXT,
XOP_8_EXT,
XOP_9_EXT,
XOP_A_EXT,
EVEX_PREFIX_EXT,
EVEX_Wb_EXT,
* of any instruction's opcode. Remove this comment when this has been finalized.
*/
};
#define HAS_MODRM 0x01
#define HAS_EXTRA_OPERANDS 0x02
#define EXTRAS_IN_CODE_FIELD 0x04
* flag to indicate opcodes that are invalid in particular modes:
*/
#define X86_INVALID 0x08
#define X64_INVALID 0x10
* when decoding. This is never needed for encoding.
*/
#define REQUIRES_PREFIX 0x20
* is invalid if encoded using vex.
*/
#define REQUIRES_VEX 0x40
* include other rex flags by combining with OPCODE_* flags, like REQUIRES_VEX
* does today.
*/
#define REQUIRES_REX 0x80
* This helps us avoid creating a ton of vex_L_extensions entries.
*/
#define REQUIRES_VEX_L_0 0x0100
* This helps us avoid creating a ton of vex_L_extensions entries.
* OPCODE_SUFFIX for REQUIRES_VEX means the same thing for encoding.
*/
#define REQUIRES_VEX_L_1 0x0200
#define HAS_PRED_CC 0x0400
#define HAS_PRED_COMPLEX 0x0800
* is invalid if encoded using evex.
*/
#define REQUIRES_EVEX 0x01000
*/
#define REQUIRES_EVEX_LL_0 0x02000
* to constrain the VSIB's index register's size.
*/
#define REQUIRES_VSIB_YMM 0x04000
#define REQUIRES_VSIB_ZMM 0x08000
#define REQUIRES_NOT_K0 0x10000
#define DR_EVEX_INPUT_OPSZ_1 0x20000
#define DR_EVEX_INPUT_OPSZ_2 0x40000
#define DR_EVEX_INPUT_OPSZ_4 0x80000
#define DR_EVEX_INPUT_OPSZ_8 0x100000
#define EVEX_b_IS_SAE 0x200000
#define EVEX_L_LL_IS_ER 0x400000
struct _decode_info_t {
uint opcode;
* that are shared as-is with instr_t (PREFIX_SIGNIFICANT).
* We assume we're in the default mode (32-bit or 64-bit,
* depending on our build) and that the address and data size
* prefixes can be treated as absolute.
*/
uint prefixes;
reg_id_t seg_override;
byte modrm;
byte mod;
byte reg;
byte rm;
bool has_sib;
byte scale;
byte index;
byte base;
bool has_disp;
int disp;
opnd_size_t size_immed;
opnd_size_t size_immed2;
bool immed_pc_relativize : 1;
bool immed_subtract_length : 1;
bool immed_pc_rel_offs : 1;
ushort immed_shift;
ptr_int_t immed;
ptr_int_t immed2;
byte *start_pc;
byte *final_pc;
uint len;
* re-relativizing level 1-3 relative jumps. To save space we could make it a
* union with disp.
*/
byte *disp_abs;
#ifdef X64
* per-dcontext we have our own field here instead of passing dcontext around.
* It's up to the caller to set this field to match either the instr_t
* or the dcontext_t field.
*/
bool x86_mode;
#endif
byte *orig_pc;
bool data_prefix;
bool rep_prefix;
bool repne_prefix;
union {
byte vex_vvvv;
byte evex_vvvv;
};
bool vex_encoded;
bool evex_encoded;
byte evex_aaa;
ptr_int_t cur_offs;
bool has_instr_opnds;
dr_tuple_type_t tuple_type;
opnd_size_t input_size;
};
* them, kept in encode.c
*
* The TYPE_x enums are listed in 'Appendix A Opcode Map (Intel SDM Volume 2)'
* specifically A.2.1 Codes for Addressing Method
*/
enum {
TYPE_NONE,
TYPE_A,
TYPE_B,
TYPE_C,
TYPE_D,
TYPE_E,
TYPE_G,
TYPE_H,
TYPE_I,
TYPE_J,
TYPE_L,
TYPE_M,
TYPE_O,
TYPE_P,
TYPE_Q,
TYPE_R,
TYPE_S,
TYPE_V,
TYPE_W,
TYPE_X,
TYPE_Y,
TYPE_P_MODRM,
TYPE_V_MODRM,
TYPE_1,
TYPE_FLOATCONST,
TYPE_XLAT,
TYPE_MASKMOVQ,
TYPE_FLOATMEM,
TYPE_VSIB,
TYPE_REG,
TYPE_XREG,
TYPE_VAR_REG,
* 16 w/ data prefix or 64 w/ rex.w: equivalent of Intel 'v'
* == like OPSZ_4_rex8_short2 */
TYPE_VARZ_REG,
* 16 w/ data prefix: equivalent of Intel 'z'
* == like OPSZ_4_short2 */
TYPE_VAR_XREG,
* but can be 16 w/ data prefix: equivalent of Intel 'd64'
* == like OPSZ_4x8_short2 */
TYPE_VAR_REGX,
* equivalent of Intel 'y' == like OPSZ_4_rex8 */
TYPE_VAR_ADDR_XREG,
* but can be 16/32 w/ addr prefix: equivalent of Intel 'd64' */
* register (r8-r15): we could try to add a flag that modifies the above
* register types, but we'd have to stick it inside some stolen bits. For
* simplicity, we just make each combination a separate type:
*/
TYPE_REG_EX,
TYPE_VAR_REG_EX,
* used for xchg and mov_imm 'v' immed. */
TYPE_VAR_XREG_EX,
* used for pop and push. */
TYPE_VAR_REGX_EX,
TYPE_INDIR_E,
TYPE_INDIR_REG,
TYPE_INDIR_VAR_XREG,
* with a base of 32/64 depending on the mode;
* indirected size varies with data prefix */
TYPE_INDIR_VAR_REG,
* with a base of 32/64;
* indirected size varies with data and rex prefixes */
TYPE_INDIR_VAR_XIREG,
* with a base of 32/64 depending on the mode;
* indirected size varies w/ data prefix, except
* 64-bit Intel */
TYPE_INDIR_VAR_XREG_OFFS_1,
* -1 * size */
TYPE_INDIR_VAR_XREG_OFFS_8,
* -8 * size and a size of 8 stack slots */
TYPE_INDIR_VAR_XREG_OFFS_N,
* -N * size and a size to match: i.e., it
* varies based on other operands */
TYPE_INDIR_VAR_XIREG_OFFS_1,
* -1 * size */
TYPE_INDIR_VAR_REG_OFFS_2,
* -2 * size and a size of 2 stack slots */
* to store the base reg: but since most base regs are xsp we could
* encode that into the type and store the size in the size field
*/
TYPE_INDIR_VAR_XREG_SIZEx8,
* 8 * regular size */
TYPE_INDIR_VAR_REG_SIZEx2,
* 2 * regular size */
TYPE_INDIR_VAR_REG_SIZEx3x5,
* 3 * regular size for 32-bit, 5 * regular
* size for 64-bit */
TYPE_K_MODRM,
TYPE_K_MODRM_R,
TYPE_K_REG,
TYPE_K_VEX,
TYPE_K_EVEX,
TYPE_T_REG,
TYPE_T_MODRM,
* memory in 32-bit mode, or 16 bytes memory in 64-bit
* mode.
*/
TYPE_BEYOND_LAST_ENUM,
};
#define MODRM_BYTE(mod, reg, rm) ((byte)(((mod) << 6) | ((reg) << 3) | (rm)))
#define X64_MODE(di) IF_X64_ELSE(!(di)->x86_mode, false)
bool
optype_is_indir_reg(int optype);
opnd_size_t
resolve_var_reg_size(opnd_size_t sz, bool is_reg);
opnd_size_t
resolve_variable_size(decode_info_t *di , opnd_size_t sz,
bool is_reg);
opnd_size_t
resolve_variable_size_dc(dcontext_t *dcontext, uint prefixes, opnd_size_t sz,
bool is_reg);
reg_id_t
resolve_var_reg(decode_info_t *di , reg_id_t reg32, bool addr,
bool can_shrink _IF_X64(bool default_64) _IF_X64(bool can_grow)
_IF_X64(bool extendable));
opnd_size_t
resolve_addr_size(decode_info_t *di );
opnd_size_t
indir_var_reg_size(decode_info_t *di, int optype);
int
indir_var_reg_offs_factor(int optype);
opnd_size_t
expand_subreg_size(opnd_size_t sz);
dr_pred_type_t
decode_predicate_from_instr_info(uint opcode, const instr_info_t *info);
* Intel's Vol.2A 2.6.5 "Compressed Displacement (disp8*N) Support in EVEX".
*/
int
decode_get_compressed_disp_scale(decode_info_t *di);
void
decode_get_tuple_type_input_size(const instr_info_t *info, decode_info_t *di);
bool
opc_is_cbr_arch(int opc);
extern const instr_info_t first_byte[];
extern const instr_info_t second_byte[];
extern const instr_info_t base_extensions[][8];
extern const instr_info_t prefix_extensions[][12];
extern const instr_info_t mod_extensions[][2];
extern const instr_info_t rm_extensions[][8];
extern const instr_info_t x64_extensions[][2];
extern const instr_info_t rex_b_extensions[][2];
extern const instr_info_t rex_w_extensions[][2];
extern const instr_info_t vex_prefix_extensions[][2];
extern const instr_info_t e_vex_extensions[][3];
extern const instr_info_t vex_L_extensions[][3];
extern const instr_info_t vex_W_extensions[][2];
extern const byte third_byte_38_index[256];
extern const byte third_byte_3a_index[256];
extern const instr_info_t third_byte_38[];
extern const instr_info_t third_byte_3a[];
extern const instr_info_t rep_extensions[][4];
extern const instr_info_t repne_extensions[][6];
extern const instr_info_t float_low_modrm[];
extern const instr_info_t float_high_modrm[][64];
extern const byte suffix_index[256];
extern const instr_info_t suffix_extensions[];
extern const instr_info_t extra_operands[];
extern const byte xop_8_index[256];
extern const byte xop_9_index[256];
extern const byte xop_a_index[256];
extern const instr_info_t xop_prefix_extensions[][2];
extern const instr_info_t xop_extensions[];
extern const instr_info_t evex_prefix_extensions[][2];
extern const instr_info_t evex_Wb_extensions[][4];
extern const instr_info_t *const op_instr[];
#endif