//===- bolt/Target/X86/X86MCSymbolizer.cpp --------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "X86MCSymbolizer.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "bolt/Core/BinaryContext.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Core/MCPlusBuilder.h"
#include "bolt/Core/Relocation.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"

#define DEBUG_TYPE "bolt-symbolizer"

namespace llvm {
namespace bolt {

X86MCSymbolizer::~X86MCSymbolizer() {}

bool X86MCSymbolizer::tryAddingSymbolicOperand(
    MCInst &Inst, raw_ostream &CStream, int64_t Value, uint64_t InstAddress,
    bool IsBranch, uint64_t ImmOffset, uint64_t ImmSize, uint64_t InstSize) {
  if (IsBranch)
    return false;

  // Ignore implicit operands.
  if (ImmSize == 0)
    return false;

  BinaryContext &BC = Function.getBinaryContext();
  MCContext *Ctx = BC.Ctx.get();

  if (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst))
    return false;

  /// Add symbolic operand to the instruction with an optional addend.
  auto addOperand = [&](const MCSymbol *Symbol, uint64_t Addend) {
    const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, *Ctx);
    if (Addend)
      Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Addend, *Ctx),
                                     *Ctx);
    Inst.addOperand(MCOperand::createExpr(Expr));
  };

  // Check if the operand being added is a displacement part of a compound
  // memory operand that uses PC-relative addressing. If it is, try to symbolize
  // it without relocations. Return true on success, false otherwise.
  auto processPCRelOperandNoRel = [&]() {
    const int MemOp = BC.MIB->getMemoryOperandNo(Inst);
    if (MemOp == -1)
      return false;

    const unsigned DispOp = MemOp + X86::AddrDisp;
    if (Inst.getNumOperands() != DispOp)
      return false;

    const MCOperand &Base = Inst.getOperand(MemOp + X86::AddrBaseReg);
    if (Base.getReg() != BC.MRI->getProgramCounter())
      return false;

    const MCOperand &Scale = Inst.getOperand(MemOp + X86::AddrScaleAmt);
    const MCOperand &Index = Inst.getOperand(MemOp + X86::AddrIndexReg);
    if (Scale.getImm() != 0 && Index.getReg() != MCRegister::NoRegister)
      return false;

    const MCSymbol *TargetSymbol;
    uint64_t TargetOffset;

    if (!CreateNewSymbols) {
      if (BinaryData *BD = BC.getBinaryDataContainingAddress(Value)) {
        TargetSymbol = BD->getSymbol();
        TargetOffset = Value - BD->getAddress();
      } else {
        return false;
      }
    } else {
      std::tie(TargetSymbol, TargetOffset) =
          BC.handleAddressRef(Value, Function, /*IsPCRel=*/true);
    }

    addOperand(TargetSymbol, TargetOffset);

    return true;
  };

  // Check for GOTPCRELX relocations first. Because these relocations allow the
  // linker to modify the instruction, we have to check the offset range
  // corresponding to the instruction, not the offset of the operand.
  // Note that if there is GOTPCRELX relocation against the instruction, there
  // will be no other relocation in this range, since GOTPCRELX applies only to
  // certain instruction types.
  const uint64_t InstOffset = InstAddress - Function.getAddress();
  const Relocation *Relocation =
      Function.getRelocationInRange(InstOffset, InstOffset + InstSize);
  if (Relocation && Relocation::isX86GOTPCRELX(Relocation->Type)) {
    // If the operand is PC-relative, convert it without using the relocation
    // information. For GOTPCRELX, it is safe to use the absolute address
    // instead of extracting the addend from the relocation, as non-standard
    // forms will be rejected by linker conversion process and the operand
    // will always reference GOT which we don't rewrite.
    if (processPCRelOperandNoRel())
      return true;

    // The linker converted the PC-relative address to an absolute one.
    // Symbolize this address.
    if (CreateNewSymbols)
      BC.handleAddressRef(Value, Function, /*IsPCRel=*/false);

    const BinaryData *Target = BC.getBinaryDataAtAddress(Value);
    if (!Target) {
      assert(!CreateNewSymbols &&
             "BinaryData should exist at converted GOTPCRELX destination");
      return false;
    }

    addOperand(Target->getSymbol(), /*Addend=*/0);

    return true;
  }

  // Check for relocations against the operand.
  if (!Relocation || Relocation->Offset != InstOffset + ImmOffset)
    Relocation = Function.getRelocationAt(InstOffset + ImmOffset);

  if (!Relocation)
    return processPCRelOperandNoRel();

  // GOTPC64 is special because the X86 Assembler doesn't know how to emit
  // a PC-relative 8-byte fixup, which is what we need to cover this. The
  // only way to do this is to use the symbol name _GLOBAL_OFFSET_TABLE_.
  if (Relocation::isX86GOTPC64(Relocation->Type)) {
    auto PairOrErr = handleGOTPC64(*Relocation, InstAddress);
    if (auto E = PairOrErr.takeError()) {
      Function.setSimple(false);
      BC.logBOLTErrorsAndQuitOnFatal(std::move(E));
      return false;
    }
    auto [Sym, Addend] = *PairOrErr;
    addOperand(Sym, Addend);
    return true;
  }

  uint64_t SymbolValue = Relocation->Value - Relocation->Addend;
  if (Relocation->isPCRelative())
    SymbolValue += InstAddress + ImmOffset;

  // Process reference to the symbol.
  if (CreateNewSymbols)
    BC.handleAddressRef(SymbolValue, Function, Relocation->isPCRelative());

  uint64_t Addend = Relocation->Addend;
  // Real addend for pc-relative targets is adjusted with a delta from
  // the relocation placement to the next instruction.
  if (Relocation->isPCRelative())
    Addend += InstOffset + InstSize - Relocation->Offset;

  addOperand(Relocation->Symbol, Addend);

  return true;
}

Expected<std::pair<MCSymbol *, uint64_t>>
X86MCSymbolizer::handleGOTPC64(const Relocation &R, uint64_t InstrAddr) {
  BinaryContext &BC = Function.getBinaryContext();
  const BinaryData *GOTSymBD = BC.getGOTSymbol();
  if (!GOTSymBD || !GOTSymBD->getAddress()) {
    // This error is pretty serious but we can't kill the disassembler
    // because of it, so don't make it fatal. Log it and warn the user.
    return createNonFatalBOLTError(
        "R_X86_GOTPC64 relocation is present but we did not detect "
        "a valid  _GLOBAL_OFFSET_TABLE_ in symbol table\n");
  }
  // R_X86_GOTPC64 are not relative to the Reloc nor end of instruction,
  // but the start of the MOVABSQ instruction. So the Target Address is
  // whatever is encoded in the original operand when we disassembled
  // the binary (here, R.Value) plus MOVABSQ address (InstrAddr).
  // Here we extract the intended Addend by subtracting the real
  // GOT addr.
  const int64_t Addend = R.Value + InstrAddr - GOTSymBD->getAddress();
  return std::make_pair(BC.Ctx->getOrCreateSymbol("_GLOBAL_OFFSET_TABLE_"),
                        Addend);
}

void X86MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
                                                      int64_t Value,
                                                      uint64_t Address) {}

} // namespace bolt
} // namespace llvm