* Copyright (c) 2010-2020 Google, Inc. All rights reserved.
* Copyright (c) 2010 Massachusetts Institute of Technology All rights reserved.
* Copyright (c) 2000-2010 VMware, Inc. All rights reserved.
* ******************************************************************************/
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
* and drdecode.
*/
#include "../globals.h"
#include "decode.h"
#include "instr_create_shared.h"
#include "instrument.h"
* We mark everything we add as non-app instr.
*/
#define POST instrlist_meta_postinsert
#define PRE instrlist_meta_preinsert
* encode_estimate to determine whether > 32 bits or not: so if unsure where
* it will be encoded, pass a high address) as the immediate; else
* uses val.
* Keep this in sync with patch_mov_immed_arch().
*/
void
insert_mov_immed_arch(dcontext_t *dcontext, instr_t *src_inst, byte *encode_estimate,
ptr_int_t val, opnd_t dst, instrlist_t *ilist, instr_t *instr,
OUT instr_t **first, OUT instr_t **last)
{
instr_t *mov1, *mov2;
if (src_inst != NULL)
val = (ptr_int_t)encode_estimate;
#ifdef X64
if (X64_MODE_DC(dcontext) && !opnd_is_reg(dst)) {
if (val <= INT_MAX && val >= INT_MIN) {
* 0 or 1 in top 33 bits
*/
mov1 = INSTR_CREATE_mov_imm(dcontext, dst,
(src_inst == NULL)
? OPND_CREATE_INT32((int)val)
: opnd_create_instr_ex(src_inst, OPSZ_4, 0));
PRE(ilist, instr, mov1);
mov2 = NULL;
} else {
* access to [dst] if this thread is suspended in between or another
* thread is trying to read [dst], but o/w we have to spill and
* restore a register.
*/
CLIENT_ASSERT(opnd_is_memory_reference(dst), "invalid dst opnd");
opnd_set_size(&dst, OPSZ_4);
mov1 = INSTR_CREATE_mov_st(dcontext, dst,
(src_inst == NULL)
? OPND_CREATE_INT32((int)val)
: opnd_create_instr_ex(src_inst, OPSZ_4, 0));
PRE(ilist, instr, mov1);
if (opnd_is_base_disp(dst)) {
int disp = opnd_get_disp(dst);
CLIENT_ASSERT(disp + 4 > disp, "disp overflow");
opnd_set_disp(&dst, disp + 4);
} else {
byte *addr = opnd_get_addr(dst);
CLIENT_ASSERT(!POINTER_OVERFLOW_ON_ADD(addr, 4), "addr overflow");
dst = OPND_CREATE_ABSMEM(addr + 4, OPSZ_4);
}
mov2 = INSTR_CREATE_mov_st(dcontext, dst,
(src_inst == NULL)
? OPND_CREATE_INT32((int)(val >> 32))
: opnd_create_instr_ex(src_inst, OPSZ_4, 32));
PRE(ilist, instr, mov2);
}
} else {
#endif
mov1 = INSTR_CREATE_mov_imm(dcontext, dst,
(src_inst == NULL)
? OPND_CREATE_INTPTR(val)
: opnd_create_instr_ex(src_inst, OPSZ_PTR, 0));
PRE(ilist, instr, mov1);
mov2 = NULL;
#ifdef X64
}
#endif
if (first != NULL)
*first = mov1;
if (last != NULL)
*last = mov2;
}
* encode_estimate to determine whether > 32 bits or not: so if unsure where
* it will be encoded, pass a high address) as the immediate; else
* uses val.
*/
void
insert_push_immed_arch(dcontext_t *dcontext, instr_t *src_inst, byte *encode_estimate,
ptr_int_t val, instrlist_t *ilist, instr_t *instr,
OUT instr_t **first, OUT instr_t **last)
{
instr_t *push, *mov;
if (src_inst != NULL)
val = (ptr_int_t)encode_estimate;
#ifdef X64
if (X64_MODE_DC(dcontext)) {
* access to TOS if this thread is suspended in between or another
* thread is trying to read its stack, but o/w we have to spill and
* restore a register.
*/
push = INSTR_CREATE_push_imm(dcontext,
(src_inst == NULL)
? OPND_CREATE_INT32((int)val)
: opnd_create_instr_ex(src_inst, OPSZ_4, 0));
PRE(ilist, instr, push);
* in top 33 bits
*/
if (val <= INT_MAX && val >= INT_MIN) {
mov = NULL;
} else {
mov = INSTR_CREATE_mov_st(dcontext, OPND_CREATE_MEM32(REG_XSP, 4),
(src_inst == NULL)
? OPND_CREATE_INT32((int)(val >> 32))
: opnd_create_instr_ex(src_inst, OPSZ_4, 32));
PRE(ilist, instr, mov);
}
} else {
#endif
push = INSTR_CREATE_push_imm(dcontext,
(src_inst == NULL)
? OPND_CREATE_INT32(val)
: opnd_create_instr_ex(src_inst, OPSZ_4, 0));
PRE(ilist, instr, push);
mov = NULL;
#ifdef X64
}
#endif
if (first != NULL)
*first = push;
if (last != NULL)
*last = mov;
}
* near-rel-format.
* Remember, the target is kept in the 0th src array position,
* and has already been converted from an 8-bit offset to an
* absolute PC, so we can just pretend instructions are longer
* than they really are.
*/
instr_t *
convert_to_near_rel_arch(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr)
{
int opcode = instr_get_opcode(instr);
app_pc target = NULL;
if (opcode == OP_jmp_short) {
instr_set_opcode(instr, OP_jmp);
return instr;
}
if (OP_jo_short <= opcode && opcode <= OP_jnle_short) {
instr_set_opcode(instr, opcode - OP_jo_short + OP_jo);
return instr;
}
if (OP_loopne <= opcode && opcode <= OP_jecxz) {
uint mangled_sz;
uint offs;
* from "info as" on GNU/linux system:
Note that the `jcxz', `jecxz', `loop', `loopz', `loope', `loopnz'
and `loopne' instructions only come in byte displacements, so that if
you use these instructions (`gcc' does not use them) you may get an
error message (and incorrect code). The AT&T 80386 assembler tries to
get around this problem by expanding `jcxz foo' to
jcxz cx_zero
jmp cx_nonzero
cx_zero: jmp foo
cx_nonzero:
*
* We use that same expansion, but we want to treat the entire
* three-instruction sequence as a single conditional branch.
* Thus we use a special instruction that stores the entire
* instruction sequence as mangled bytes, yet w/ a valid target operand
* (xref PR 251646).
* patch_branch and instr_invert_cbr
* know how to find the target pc (final 4 of 9 bytes).
* When decoding anything we've written we know the only jcxz or
* loop* instructions are part of these rewritten packages, and
* we use remangle_short_rewrite to read back in the instr.
* (have to do this everywhere call decode() except original
* interp, plus in input_trace())
*
* An alternative is to change 'jcxz foo' to:
<save eflags>
cmpb %cx,$0
je foo_restore
<restore eflags>
...
foo_restore: <restore eflags>
foo:
* However the added complications of restoring the eflags on
* the taken-branch path made me choose the former solution.
*/
* expand 'shortjump foo' to:
shortjump taken
jmp-short nottaken
taken: jmp foo
nottaken:
*/
if (ilist != NULL) {
opnd_t tgt = instr_get_target(instr);
instr_t *nottaken = INSTR_CREATE_label(dcontext);
instr_t *taken = INSTR_CREATE_jmp(dcontext, tgt);
ASSERT(instr_is_meta(instr));
instrlist_meta_postinsert(ilist, instr, nottaken);
instrlist_meta_postinsert(ilist, instr, taken);
instrlist_meta_postinsert(
ilist, instr,
INSTR_CREATE_jmp_short(dcontext, opnd_create_instr(nottaken)));
instr_set_target(instr, opnd_create_instr(taken));
return taken;
}
if (opnd_is_near_pc(instr_get_target(instr)))
target = opnd_get_pc(instr_get_target(instr));
else if (opnd_is_near_instr(instr_get_target(instr))) {
instr_t *tgt = opnd_get_instr(instr_get_target(instr));
* mangle_shared.c.
*/
target = instr_get_translation(tgt);
if (target == NULL && instr_raw_bits_valid(tgt))
target = instr_get_raw_bits(tgt);
ASSERT(target != NULL);
} else
ASSERT_NOT_REACHED();
* valid, but raw bits must also be valid, since they hide the multiple
* instrs. For x64, it is marked for re-relativization, but it's
* special since the target must be obtained from src0 and not
* from the raw bits (since that might not reach).
*/
mangled_sz = CTI_SHORT_REWRITE_LENGTH;
if (!reg_is_pointer_sized(opnd_get_reg(instr_get_src(instr, 1))))
mangled_sz++;
instr_allocate_raw_bits(dcontext, instr, mangled_sz);
offs = 0;
if (mangled_sz > CTI_SHORT_REWRITE_LENGTH) {
instr_set_raw_byte(instr, offs, ADDR_PREFIX_OPCODE);
offs++;
}
instr_set_raw_byte(instr, offs, decode_first_opcode_byte(opcode));
offs++;
instr_set_raw_byte(instr, offs, (byte)2);
offs++;
instr_set_raw_byte(instr, offs, decode_first_opcode_byte(OP_jmp_short));
offs++;
instr_set_raw_byte(instr, offs, (byte)5);
offs++;
instr_set_raw_byte(instr, offs, decode_first_opcode_byte(OP_jmp));
offs++;
instr_set_raw_word(instr, offs, (int)(target - (instr->bytes + mangled_sz)));
offs += sizeof(int);
ASSERT(offs == mangled_sz);
LOG(THREAD, LOG_INTERP, 2, "convert_to_near_rel: jecxz/loop* opcode\n");
instr_set_operands_valid(instr, true);
return instr;
}
LOG(THREAD, LOG_INTERP, 1, "convert_to_near_rel: unknown opcode: %d %s\n", opcode,
decode_opcode_name(opcode));
ASSERT_NOT_REACHED();
return instr;
}
* core/arch/, but untangling them all will take some work, so for now it lives here.
*/
* instr that we treat like a single conditional branch.
* On re-decoding our own output we need to recreate that instr.
* This routine assumes that the instructions encoded at pc
* are indeed a mangled cti short.
* Assumes that the first instr has already been decoded into instr,
* that pc points to the start of that instr.
* Converts instr into a new 3-raw-byte-instr with a private copy of the
* original raw bits.
* Optionally modifies the target to "target" if "target" is non-null.
* Returns the pc of the instruction after the remangled sequence.
*/
byte *
remangle_short_rewrite(dcontext_t *dcontext, instr_t *instr, byte *pc, app_pc target)
{
uint mangled_sz = CTI_SHORT_REWRITE_LENGTH;
ASSERT(instr_is_cti_short_rewrite(instr, pc));
if (*pc == ADDR_PREFIX_OPCODE)
mangled_sz++;
if (target == NULL) {
int rel_target = *((int *)(pc + mangled_sz - 4));
target = pc + mangled_sz + rel_target;
}
instr_set_target(instr, opnd_create_pc(target));
* we've already read the first 2-byte instruction, jecxz/loop*
* they all take up mangled_sz bytes
*/
instr_allocate_raw_bits(dcontext, instr, mangled_sz);
instr_set_raw_bytes(instr, pc, mangled_sz);
instr_set_raw_word(instr, mangled_sz - 4, (int)(target - (pc + mangled_sz)));
instr_set_operands_valid(instr, true);
return (pc + mangled_sz);
}