aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/X86/Disassembler
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/X86/Disassembler')
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp1009
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h112
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp1909
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h675
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h503
5 files changed, 4208 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
new file mode 100644
index 000000000000..ce8fcf164668
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -0,0 +1,1009 @@
+//===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler.
+// It contains code to translate the data produced by the decoder into
+// MCInsts.
+// Documentation for the disassembler can be found in X86Disassembler.h.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86Disassembler.h"
+#include "X86DisassemblerDecoder.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::X86Disassembler;
+
+#define DEBUG_TYPE "x86-disassembler"
+
+#define GET_REGINFO_ENUM
+#include "X86GenRegisterInfo.inc"
+#define GET_INSTRINFO_ENUM
+#include "X86GenInstrInfo.inc"
+#define GET_SUBTARGETINFO_ENUM
+#include "X86GenSubtargetInfo.inc"
+
+void llvm::X86Disassembler::Debug(const char *file, unsigned line,
+ const char *s) {
+ dbgs() << file << ":" << line << ": " << s;
+}
+
+const char *llvm::X86Disassembler::GetInstrName(unsigned Opcode,
+ const void *mii) {
+ const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii);
+ return MII->getName(Opcode);
+}
+
+#define debug(s) DEBUG(Debug(__FILE__, __LINE__, s));
+
+namespace llvm {
+
+// Fill-ins to make the compiler happy. These constants are never actually
+// assigned; they are just filler to make an automatically-generated switch
+// statement work.
+namespace X86 {
+ enum {
+ BX_SI = 500,
+ BX_DI = 501,
+ BP_SI = 502,
+ BP_DI = 503,
+ sib = 504,
+ sib64 = 505
+ };
+}
+
+extern Target TheX86_32Target, TheX86_64Target;
+
+}
+
+static bool translateInstruction(MCInst &target,
+ InternalInstruction &source,
+ const MCDisassembler *Dis);
+
+X86GenericDisassembler::X86GenericDisassembler(
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx,
+ std::unique_ptr<const MCInstrInfo> MII)
+ : MCDisassembler(STI, Ctx), MII(std::move(MII)) {
+ const FeatureBitset &FB = STI.getFeatureBits();
+ if (FB[X86::Mode16Bit]) {
+ fMode = MODE_16BIT;
+ return;
+ } else if (FB[X86::Mode32Bit]) {
+ fMode = MODE_32BIT;
+ return;
+ } else if (FB[X86::Mode64Bit]) {
+ fMode = MODE_64BIT;
+ return;
+ }
+
+ llvm_unreachable("Invalid CPU mode");
+}
+
+namespace {
+struct Region {
+ ArrayRef<uint8_t> Bytes;
+ uint64_t Base;
+ Region(ArrayRef<uint8_t> Bytes, uint64_t Base) : Bytes(Bytes), Base(Base) {}
+};
+} // end anonymous namespace
+
+/// A callback function that wraps the readByte method from Region.
+///
+/// @param Arg - The generic callback parameter. In this case, this should
+/// be a pointer to a Region.
+/// @param Byte - A pointer to the byte to be read.
+/// @param Address - The address to be read.
+static int regionReader(const void *Arg, uint8_t *Byte, uint64_t Address) {
+ auto *R = static_cast<const Region *>(Arg);
+ ArrayRef<uint8_t> Bytes = R->Bytes;
+ unsigned Index = Address - R->Base;
+ if (Bytes.size() <= Index)
+ return -1;
+ *Byte = Bytes[Index];
+ return 0;
+}
+
+/// logger - a callback function that wraps the operator<< method from
+/// raw_ostream.
+///
+/// @param arg - The generic callback parameter. This should be a pointe
+/// to a raw_ostream.
+/// @param log - A string to be logged. logger() adds a newline.
+static void logger(void* arg, const char* log) {
+ if (!arg)
+ return;
+
+ raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
+ vStream << log << "\n";
+}
+
+//
+// Public interface for the disassembler
+//
+
+MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(
+ MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream, raw_ostream &CStream) const {
+ CommentStream = &CStream;
+
+ InternalInstruction InternalInstr;
+
+ dlog_t LoggerFn = logger;
+ if (&VStream == &nulls())
+ LoggerFn = nullptr; // Disable logging completely if it's going to nulls().
+
+ Region R(Bytes, Address);
+
+ int Ret = decodeInstruction(&InternalInstr, regionReader, (const void *)&R,
+ LoggerFn, (void *)&VStream,
+ (const void *)MII.get(), Address, fMode);
+
+ if (Ret) {
+ Size = InternalInstr.readerCursor - Address;
+ return Fail;
+ } else {
+ Size = InternalInstr.length;
+ return (!translateInstruction(Instr, InternalInstr, this)) ? Success : Fail;
+ }
+}
+
+//
+// Private code that translates from struct InternalInstructions to MCInsts.
+//
+
+/// translateRegister - Translates an internal register to the appropriate LLVM
+/// register, and appends it as an operand to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param reg - The Reg to append.
+static void translateRegister(MCInst &mcInst, Reg reg) {
+#define ENTRY(x) X86::x,
+ uint8_t llvmRegnums[] = {
+ ALL_REGS
+ 0
+ };
+#undef ENTRY
+
+ uint8_t llvmRegnum = llvmRegnums[reg];
+ mcInst.addOperand(MCOperand::createReg(llvmRegnum));
+}
+
+/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
+/// immediate Value in the MCInst.
+///
+/// @param Value - The immediate Value, has had any PC adjustment made by
+/// the caller.
+/// @param isBranch - If the instruction is a branch instruction
+/// @param Address - The starting address of the instruction
+/// @param Offset - The byte offset to this immediate in the instruction
+/// @param Width - The byte width of this immediate in the instruction
+///
+/// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
+/// called then that function is called to get any symbolic information for the
+/// immediate in the instruction using the Address, Offset and Width. If that
+/// returns non-zero then the symbolic information it returns is used to create
+/// an MCExpr and that is added as an operand to the MCInst. If getOpInfo()
+/// returns zero and isBranch is true then a symbol look up for immediate Value
+/// is done and if a symbol is found an MCExpr is created with that, else
+/// an MCExpr with the immediate Value is created. This function returns true
+/// if it adds an operand to the MCInst and false otherwise.
+static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
+ uint64_t Address, uint64_t Offset,
+ uint64_t Width, MCInst &MI,
+ const MCDisassembler *Dis) {
+ return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch,
+ Offset, Width);
+}
+
+/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
+/// referenced by a load instruction with the base register that is the rip.
+/// These can often be addresses in a literal pool. The Address of the
+/// instruction and its immediate Value are used to determine the address
+/// being referenced in the literal pool entry. The SymbolLookUp call back will
+/// return a pointer to a literal 'C' string if the referenced address is an
+/// address into a section with 'C' string literals.
+static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value,
+ const void *Decoder) {
+ const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+ Dis->tryAddingPcLoadReferenceComment(Value, Address);
+}
+
+static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
+ 0, // SEG_OVERRIDE_NONE
+ X86::CS,
+ X86::SS,
+ X86::DS,
+ X86::ES,
+ X86::FS,
+ X86::GS
+};
+
+/// translateSrcIndex - Appends a source index operand to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param insn - The internal instruction.
+static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) {
+ unsigned baseRegNo;
+
+ if (insn.mode == MODE_64BIT)
+ baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::RSI;
+ else if (insn.mode == MODE_32BIT)
+ baseRegNo = insn.prefixPresent[0x67] ? X86::SI : X86::ESI;
+ else {
+ assert(insn.mode == MODE_16BIT);
+ baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::SI;
+ }
+ MCOperand baseReg = MCOperand::createReg(baseRegNo);
+ mcInst.addOperand(baseReg);
+
+ MCOperand segmentReg;
+ segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
+ mcInst.addOperand(segmentReg);
+ return false;
+}
+
+/// translateDstIndex - Appends a destination index operand to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param insn - The internal instruction.
+
+static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) {
+ unsigned baseRegNo;
+
+ if (insn.mode == MODE_64BIT)
+ baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::RDI;
+ else if (insn.mode == MODE_32BIT)
+ baseRegNo = insn.prefixPresent[0x67] ? X86::DI : X86::EDI;
+ else {
+ assert(insn.mode == MODE_16BIT);
+ baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::DI;
+ }
+ MCOperand baseReg = MCOperand::createReg(baseRegNo);
+ mcInst.addOperand(baseReg);
+ return false;
+}
+
+/// translateImmediate - Appends an immediate operand to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param immediate - The immediate value to append.
+/// @param operand - The operand, as stored in the descriptor table.
+/// @param insn - The internal instruction.
+static void translateImmediate(MCInst &mcInst, uint64_t immediate,
+ const OperandSpecifier &operand,
+ InternalInstruction &insn,
+ const MCDisassembler *Dis) {
+ // Sign-extend the immediate if necessary.
+
+ OperandType type = (OperandType)operand.type;
+
+ bool isBranch = false;
+ uint64_t pcrel = 0;
+ if (type == TYPE_RELv) {
+ isBranch = true;
+ pcrel = insn.startLocation +
+ insn.immediateOffset + insn.immediateSize;
+ switch (insn.displacementSize) {
+ default:
+ break;
+ case 1:
+ if(immediate & 0x80)
+ immediate |= ~(0xffull);
+ break;
+ case 2:
+ if(immediate & 0x8000)
+ immediate |= ~(0xffffull);
+ break;
+ case 4:
+ if(immediate & 0x80000000)
+ immediate |= ~(0xffffffffull);
+ break;
+ case 8:
+ break;
+ }
+ }
+ // By default sign-extend all X86 immediates based on their encoding.
+ else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 ||
+ type == TYPE_IMM64 || type == TYPE_IMMv) {
+ switch (operand.encoding) {
+ default:
+ break;
+ case ENCODING_IB:
+ if(immediate & 0x80)
+ immediate |= ~(0xffull);
+ break;
+ case ENCODING_IW:
+ if(immediate & 0x8000)
+ immediate |= ~(0xffffull);
+ break;
+ case ENCODING_ID:
+ if(immediate & 0x80000000)
+ immediate |= ~(0xffffffffull);
+ break;
+ case ENCODING_IO:
+ break;
+ }
+ } else if (type == TYPE_IMM3) {
+ // Check for immediates that printSSECC can't handle.
+ if (immediate >= 8) {
+ unsigned NewOpc;
+ switch (mcInst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode");
+ case X86::CMPPDrmi: NewOpc = X86::CMPPDrmi_alt; break;
+ case X86::CMPPDrri: NewOpc = X86::CMPPDrri_alt; break;
+ case X86::CMPPSrmi: NewOpc = X86::CMPPSrmi_alt; break;
+ case X86::CMPPSrri: NewOpc = X86::CMPPSrri_alt; break;
+ case X86::CMPSDrm: NewOpc = X86::CMPSDrm_alt; break;
+ case X86::CMPSDrr: NewOpc = X86::CMPSDrr_alt; break;
+ case X86::CMPSSrm: NewOpc = X86::CMPSSrm_alt; break;
+ case X86::CMPSSrr: NewOpc = X86::CMPSSrr_alt; break;
+ case X86::VPCOMBri: NewOpc = X86::VPCOMBri_alt; break;
+ case X86::VPCOMBmi: NewOpc = X86::VPCOMBmi_alt; break;
+ case X86::VPCOMWri: NewOpc = X86::VPCOMWri_alt; break;
+ case X86::VPCOMWmi: NewOpc = X86::VPCOMWmi_alt; break;
+ case X86::VPCOMDri: NewOpc = X86::VPCOMDri_alt; break;
+ case X86::VPCOMDmi: NewOpc = X86::VPCOMDmi_alt; break;
+ case X86::VPCOMQri: NewOpc = X86::VPCOMQri_alt; break;
+ case X86::VPCOMQmi: NewOpc = X86::VPCOMQmi_alt; break;
+ case X86::VPCOMUBri: NewOpc = X86::VPCOMUBri_alt; break;
+ case X86::VPCOMUBmi: NewOpc = X86::VPCOMUBmi_alt; break;
+ case X86::VPCOMUWri: NewOpc = X86::VPCOMUWri_alt; break;
+ case X86::VPCOMUWmi: NewOpc = X86::VPCOMUWmi_alt; break;
+ case X86::VPCOMUDri: NewOpc = X86::VPCOMUDri_alt; break;
+ case X86::VPCOMUDmi: NewOpc = X86::VPCOMUDmi_alt; break;
+ case X86::VPCOMUQri: NewOpc = X86::VPCOMUQri_alt; break;
+ case X86::VPCOMUQmi: NewOpc = X86::VPCOMUQmi_alt; break;
+ }
+ // Switch opcode to the one that doesn't get special printing.
+ mcInst.setOpcode(NewOpc);
+ }
+ } else if (type == TYPE_IMM5) {
+ // Check for immediates that printAVXCC can't handle.
+ if (immediate >= 32) {
+ unsigned NewOpc;
+ switch (mcInst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode");
+ case X86::VCMPPDrmi: NewOpc = X86::VCMPPDrmi_alt; break;
+ case X86::VCMPPDrri: NewOpc = X86::VCMPPDrri_alt; break;
+ case X86::VCMPPSrmi: NewOpc = X86::VCMPPSrmi_alt; break;
+ case X86::VCMPPSrri: NewOpc = X86::VCMPPSrri_alt; break;
+ case X86::VCMPSDrm: NewOpc = X86::VCMPSDrm_alt; break;
+ case X86::VCMPSDrr: NewOpc = X86::VCMPSDrr_alt; break;
+ case X86::VCMPSSrm: NewOpc = X86::VCMPSSrm_alt; break;
+ case X86::VCMPSSrr: NewOpc = X86::VCMPSSrr_alt; break;
+ case X86::VCMPPDYrmi: NewOpc = X86::VCMPPDYrmi_alt; break;
+ case X86::VCMPPDYrri: NewOpc = X86::VCMPPDYrri_alt; break;
+ case X86::VCMPPSYrmi: NewOpc = X86::VCMPPSYrmi_alt; break;
+ case X86::VCMPPSYrri: NewOpc = X86::VCMPPSYrri_alt; break;
+ case X86::VCMPPDZrmi: NewOpc = X86::VCMPPDZrmi_alt; break;
+ case X86::VCMPPDZrri: NewOpc = X86::VCMPPDZrri_alt; break;
+ case X86::VCMPPDZrrib: NewOpc = X86::VCMPPDZrrib_alt; break;
+ case X86::VCMPPSZrmi: NewOpc = X86::VCMPPSZrmi_alt; break;
+ case X86::VCMPPSZrri: NewOpc = X86::VCMPPSZrri_alt; break;
+ case X86::VCMPPSZrrib: NewOpc = X86::VCMPPSZrrib_alt; break;
+ case X86::VCMPSDZrm: NewOpc = X86::VCMPSDZrmi_alt; break;
+ case X86::VCMPSDZrr: NewOpc = X86::VCMPSDZrri_alt; break;
+ case X86::VCMPSSZrm: NewOpc = X86::VCMPSSZrmi_alt; break;
+ case X86::VCMPSSZrr: NewOpc = X86::VCMPSSZrri_alt; break;
+ }
+ // Switch opcode to the one that doesn't get special printing.
+ mcInst.setOpcode(NewOpc);
+ }
+ } else if (type == TYPE_AVX512ICC) {
+ if (immediate >= 8 || ((immediate & 0x3) == 3)) {
+ unsigned NewOpc;
+ switch (mcInst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode");
+ case X86::VPCMPBZ128rmi: NewOpc = X86::VPCMPBZ128rmi_alt; break;
+ case X86::VPCMPBZ128rmik: NewOpc = X86::VPCMPBZ128rmik_alt; break;
+ case X86::VPCMPBZ128rri: NewOpc = X86::VPCMPBZ128rri_alt; break;
+ case X86::VPCMPBZ128rrik: NewOpc = X86::VPCMPBZ128rrik_alt; break;
+ case X86::VPCMPBZ256rmi: NewOpc = X86::VPCMPBZ256rmi_alt; break;
+ case X86::VPCMPBZ256rmik: NewOpc = X86::VPCMPBZ256rmik_alt; break;
+ case X86::VPCMPBZ256rri: NewOpc = X86::VPCMPBZ256rri_alt; break;
+ case X86::VPCMPBZ256rrik: NewOpc = X86::VPCMPBZ256rrik_alt; break;
+ case X86::VPCMPBZrmi: NewOpc = X86::VPCMPBZrmi_alt; break;
+ case X86::VPCMPBZrmik: NewOpc = X86::VPCMPBZrmik_alt; break;
+ case X86::VPCMPBZrri: NewOpc = X86::VPCMPBZrri_alt; break;
+ case X86::VPCMPBZrrik: NewOpc = X86::VPCMPBZrrik_alt; break;
+ case X86::VPCMPDZ128rmi: NewOpc = X86::VPCMPDZ128rmi_alt; break;
+ case X86::VPCMPDZ128rmib: NewOpc = X86::VPCMPDZ128rmib_alt; break;
+ case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPDZ128rmibk_alt; break;
+ case X86::VPCMPDZ128rmik: NewOpc = X86::VPCMPDZ128rmik_alt; break;
+ case X86::VPCMPDZ128rri: NewOpc = X86::VPCMPDZ128rri_alt; break;
+ case X86::VPCMPDZ128rrik: NewOpc = X86::VPCMPDZ128rrik_alt; break;
+ case X86::VPCMPDZ256rmi: NewOpc = X86::VPCMPDZ256rmi_alt; break;
+ case X86::VPCMPDZ256rmib: NewOpc = X86::VPCMPDZ256rmib_alt; break;
+ case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPDZ256rmibk_alt; break;
+ case X86::VPCMPDZ256rmik: NewOpc = X86::VPCMPDZ256rmik_alt; break;
+ case X86::VPCMPDZ256rri: NewOpc = X86::VPCMPDZ256rri_alt; break;
+ case X86::VPCMPDZ256rrik: NewOpc = X86::VPCMPDZ256rrik_alt; break;
+ case X86::VPCMPDZrmi: NewOpc = X86::VPCMPDZrmi_alt; break;
+ case X86::VPCMPDZrmib: NewOpc = X86::VPCMPDZrmib_alt; break;
+ case X86::VPCMPDZrmibk: NewOpc = X86::VPCMPDZrmibk_alt; break;
+ case X86::VPCMPDZrmik: NewOpc = X86::VPCMPDZrmik_alt; break;
+ case X86::VPCMPDZrri: NewOpc = X86::VPCMPDZrri_alt; break;
+ case X86::VPCMPDZrrik: NewOpc = X86::VPCMPDZrrik_alt; break;
+ case X86::VPCMPQZ128rmi: NewOpc = X86::VPCMPQZ128rmi_alt; break;
+ case X86::VPCMPQZ128rmib: NewOpc = X86::VPCMPQZ128rmib_alt; break;
+ case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPQZ128rmibk_alt; break;
+ case X86::VPCMPQZ128rmik: NewOpc = X86::VPCMPQZ128rmik_alt; break;
+ case X86::VPCMPQZ128rri: NewOpc = X86::VPCMPQZ128rri_alt; break;
+ case X86::VPCMPQZ128rrik: NewOpc = X86::VPCMPQZ128rrik_alt; break;
+ case X86::VPCMPQZ256rmi: NewOpc = X86::VPCMPQZ256rmi_alt; break;
+ case X86::VPCMPQZ256rmib: NewOpc = X86::VPCMPQZ256rmib_alt; break;
+ case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPQZ256rmibk_alt; break;
+ case X86::VPCMPQZ256rmik: NewOpc = X86::VPCMPQZ256rmik_alt; break;
+ case X86::VPCMPQZ256rri: NewOpc = X86::VPCMPQZ256rri_alt; break;
+ case X86::VPCMPQZ256rrik: NewOpc = X86::VPCMPQZ256rrik_alt; break;
+ case X86::VPCMPQZrmi: NewOpc = X86::VPCMPQZrmi_alt; break;
+ case X86::VPCMPQZrmib: NewOpc = X86::VPCMPQZrmib_alt; break;
+ case X86::VPCMPQZrmibk: NewOpc = X86::VPCMPQZrmibk_alt; break;
+ case X86::VPCMPQZrmik: NewOpc = X86::VPCMPQZrmik_alt; break;
+ case X86::VPCMPQZrri: NewOpc = X86::VPCMPQZrri_alt; break;
+ case X86::VPCMPQZrrik: NewOpc = X86::VPCMPQZrrik_alt; break;
+ case X86::VPCMPUBZ128rmi: NewOpc = X86::VPCMPUBZ128rmi_alt; break;
+ case X86::VPCMPUBZ128rmik: NewOpc = X86::VPCMPUBZ128rmik_alt; break;
+ case X86::VPCMPUBZ128rri: NewOpc = X86::VPCMPUBZ128rri_alt; break;
+ case X86::VPCMPUBZ128rrik: NewOpc = X86::VPCMPUBZ128rrik_alt; break;
+ case X86::VPCMPUBZ256rmi: NewOpc = X86::VPCMPUBZ256rmi_alt; break;
+ case X86::VPCMPUBZ256rmik: NewOpc = X86::VPCMPUBZ256rmik_alt; break;
+ case X86::VPCMPUBZ256rri: NewOpc = X86::VPCMPUBZ256rri_alt; break;
+ case X86::VPCMPUBZ256rrik: NewOpc = X86::VPCMPUBZ256rrik_alt; break;
+ case X86::VPCMPUBZrmi: NewOpc = X86::VPCMPUBZrmi_alt; break;
+ case X86::VPCMPUBZrmik: NewOpc = X86::VPCMPUBZrmik_alt; break;
+ case X86::VPCMPUBZrri: NewOpc = X86::VPCMPUBZrri_alt; break;
+ case X86::VPCMPUBZrrik: NewOpc = X86::VPCMPUBZrrik_alt; break;
+ case X86::VPCMPUDZ128rmi: NewOpc = X86::VPCMPUDZ128rmi_alt; break;
+ case X86::VPCMPUDZ128rmib: NewOpc = X86::VPCMPUDZ128rmib_alt; break;
+ case X86::VPCMPUDZ128rmibk: NewOpc = X86::VPCMPUDZ128rmibk_alt; break;
+ case X86::VPCMPUDZ128rmik: NewOpc = X86::VPCMPUDZ128rmik_alt; break;
+ case X86::VPCMPUDZ128rri: NewOpc = X86::VPCMPUDZ128rri_alt; break;
+ case X86::VPCMPUDZ128rrik: NewOpc = X86::VPCMPUDZ128rrik_alt; break;
+ case X86::VPCMPUDZ256rmi: NewOpc = X86::VPCMPUDZ256rmi_alt; break;
+ case X86::VPCMPUDZ256rmib: NewOpc = X86::VPCMPUDZ256rmib_alt; break;
+ case X86::VPCMPUDZ256rmibk: NewOpc = X86::VPCMPUDZ256rmibk_alt; break;
+ case X86::VPCMPUDZ256rmik: NewOpc = X86::VPCMPUDZ256rmik_alt; break;
+ case X86::VPCMPUDZ256rri: NewOpc = X86::VPCMPUDZ256rri_alt; break;
+ case X86::VPCMPUDZ256rrik: NewOpc = X86::VPCMPUDZ256rrik_alt; break;
+ case X86::VPCMPUDZrmi: NewOpc = X86::VPCMPUDZrmi_alt; break;
+ case X86::VPCMPUDZrmib: NewOpc = X86::VPCMPUDZrmib_alt; break;
+ case X86::VPCMPUDZrmibk: NewOpc = X86::VPCMPUDZrmibk_alt; break;
+ case X86::VPCMPUDZrmik: NewOpc = X86::VPCMPUDZrmik_alt; break;
+ case X86::VPCMPUDZrri: NewOpc = X86::VPCMPUDZrri_alt; break;
+ case X86::VPCMPUDZrrik: NewOpc = X86::VPCMPUDZrrik_alt; break;
+ case X86::VPCMPUQZ128rmi: NewOpc = X86::VPCMPUQZ128rmi_alt; break;
+ case X86::VPCMPUQZ128rmib: NewOpc = X86::VPCMPUQZ128rmib_alt; break;
+ case X86::VPCMPUQZ128rmibk: NewOpc = X86::VPCMPUQZ128rmibk_alt; break;
+ case X86::VPCMPUQZ128rmik: NewOpc = X86::VPCMPUQZ128rmik_alt; break;
+ case X86::VPCMPUQZ128rri: NewOpc = X86::VPCMPUQZ128rri_alt; break;
+ case X86::VPCMPUQZ128rrik: NewOpc = X86::VPCMPUQZ128rrik_alt; break;
+ case X86::VPCMPUQZ256rmi: NewOpc = X86::VPCMPUQZ256rmi_alt; break;
+ case X86::VPCMPUQZ256rmib: NewOpc = X86::VPCMPUQZ256rmib_alt; break;
+ case X86::VPCMPUQZ256rmibk: NewOpc = X86::VPCMPUQZ256rmibk_alt; break;
+ case X86::VPCMPUQZ256rmik: NewOpc = X86::VPCMPUQZ256rmik_alt; break;
+ case X86::VPCMPUQZ256rri: NewOpc = X86::VPCMPUQZ256rri_alt; break;
+ case X86::VPCMPUQZ256rrik: NewOpc = X86::VPCMPUQZ256rrik_alt; break;
+ case X86::VPCMPUQZrmi: NewOpc = X86::VPCMPUQZrmi_alt; break;
+ case X86::VPCMPUQZrmib: NewOpc = X86::VPCMPUQZrmib_alt; break;
+ case X86::VPCMPUQZrmibk: NewOpc = X86::VPCMPUQZrmibk_alt; break;
+ case X86::VPCMPUQZrmik: NewOpc = X86::VPCMPUQZrmik_alt; break;
+ case X86::VPCMPUQZrri: NewOpc = X86::VPCMPUQZrri_alt; break;
+ case X86::VPCMPUQZrrik: NewOpc = X86::VPCMPUQZrrik_alt; break;
+ case X86::VPCMPUWZ128rmi: NewOpc = X86::VPCMPUWZ128rmi_alt; break;
+ case X86::VPCMPUWZ128rmik: NewOpc = X86::VPCMPUWZ128rmik_alt; break;
+ case X86::VPCMPUWZ128rri: NewOpc = X86::VPCMPUWZ128rri_alt; break;
+ case X86::VPCMPUWZ128rrik: NewOpc = X86::VPCMPUWZ128rrik_alt; break;
+ case X86::VPCMPUWZ256rmi: NewOpc = X86::VPCMPUWZ256rmi_alt; break;
+ case X86::VPCMPUWZ256rmik: NewOpc = X86::VPCMPUWZ256rmik_alt; break;
+ case X86::VPCMPUWZ256rri: NewOpc = X86::VPCMPUWZ256rri_alt; break;
+ case X86::VPCMPUWZ256rrik: NewOpc = X86::VPCMPUWZ256rrik_alt; break;
+ case X86::VPCMPUWZrmi: NewOpc = X86::VPCMPUWZrmi_alt; break;
+ case X86::VPCMPUWZrmik: NewOpc = X86::VPCMPUWZrmik_alt; break;
+ case X86::VPCMPUWZrri: NewOpc = X86::VPCMPUWZrri_alt; break;
+ case X86::VPCMPUWZrrik: NewOpc = X86::VPCMPUWZrrik_alt; break;
+ case X86::VPCMPWZ128rmi: NewOpc = X86::VPCMPWZ128rmi_alt; break;
+ case X86::VPCMPWZ128rmik: NewOpc = X86::VPCMPWZ128rmik_alt; break;
+ case X86::VPCMPWZ128rri: NewOpc = X86::VPCMPWZ128rri_alt; break;
+ case X86::VPCMPWZ128rrik: NewOpc = X86::VPCMPWZ128rrik_alt; break;
+ case X86::VPCMPWZ256rmi: NewOpc = X86::VPCMPWZ256rmi_alt; break;
+ case X86::VPCMPWZ256rmik: NewOpc = X86::VPCMPWZ256rmik_alt; break;
+ case X86::VPCMPWZ256rri: NewOpc = X86::VPCMPWZ256rri_alt; break;
+ case X86::VPCMPWZ256rrik: NewOpc = X86::VPCMPWZ256rrik_alt; break;
+ case X86::VPCMPWZrmi: NewOpc = X86::VPCMPWZrmi_alt; break;
+ case X86::VPCMPWZrmik: NewOpc = X86::VPCMPWZrmik_alt; break;
+ case X86::VPCMPWZrri: NewOpc = X86::VPCMPWZrri_alt; break;
+ case X86::VPCMPWZrrik: NewOpc = X86::VPCMPWZrrik_alt; break;
+ }
+ // Switch opcode to the one that doesn't get special printing.
+ mcInst.setOpcode(NewOpc);
+ }
+ }
+
+ switch (type) {
+ case TYPE_XMM32:
+ case TYPE_XMM64:
+ case TYPE_XMM128:
+ mcInst.addOperand(MCOperand::createReg(X86::XMM0 + (immediate >> 4)));
+ return;
+ case TYPE_XMM256:
+ mcInst.addOperand(MCOperand::createReg(X86::YMM0 + (immediate >> 4)));
+ return;
+ case TYPE_XMM512:
+ mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4)));
+ return;
+ case TYPE_BNDR:
+ mcInst.addOperand(MCOperand::createReg(X86::BND0 + (immediate >> 4)));
+ case TYPE_REL8:
+ isBranch = true;
+ pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
+ if (immediate & 0x80)
+ immediate |= ~(0xffull);
+ break;
+ case TYPE_REL16:
+ isBranch = true;
+ pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
+ if (immediate & 0x8000)
+ immediate |= ~(0xffffull);
+ break;
+ case TYPE_REL32:
+ case TYPE_REL64:
+ isBranch = true;
+ pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
+ if(immediate & 0x80000000)
+ immediate |= ~(0xffffffffull);
+ break;
+ default:
+ // operand is 64 bits wide. Do nothing.
+ break;
+ }
+
+ if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
+ insn.immediateOffset, insn.immediateSize,
+ mcInst, Dis))
+ mcInst.addOperand(MCOperand::createImm(immediate));
+
+ if (type == TYPE_MOFFS8 || type == TYPE_MOFFS16 ||
+ type == TYPE_MOFFS32 || type == TYPE_MOFFS64) {
+ MCOperand segmentReg;
+ segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
+ mcInst.addOperand(segmentReg);
+ }
+}
+
+/// translateRMRegister - Translates a register stored in the R/M field of the
+/// ModR/M byte to its LLVM equivalent and appends it to an MCInst.
+/// @param mcInst - The MCInst to append to.
+/// @param insn - The internal instruction to extract the R/M field
+/// from.
+/// @return - 0 on success; -1 otherwise
+static bool translateRMRegister(MCInst &mcInst,
+ InternalInstruction &insn) {
+ if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
+ debug("A R/M register operand may not have a SIB byte");
+ return true;
+ }
+
+ switch (insn.eaBase) {
+ default:
+ debug("Unexpected EA base register");
+ return true;
+ case EA_BASE_NONE:
+ debug("EA_BASE_NONE for ModR/M base");
+ return true;
+#define ENTRY(x) case EA_BASE_##x:
+ ALL_EA_BASES
+#undef ENTRY
+ debug("A R/M register operand may not have a base; "
+ "the operand must be a register.");
+ return true;
+#define ENTRY(x) \
+ case EA_REG_##x: \
+ mcInst.addOperand(MCOperand::createReg(X86::x)); break;
+ ALL_REGS
+#undef ENTRY
+ }
+
+ return false;
+}
+
+/// translateRMMemory - Translates a memory operand stored in the Mod and R/M
+/// fields of an internal instruction (and possibly its SIB byte) to a memory
+/// operand in LLVM's format, and appends it to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param insn - The instruction to extract Mod, R/M, and SIB fields
+/// from.
+/// @return - 0 on success; nonzero otherwise
+static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
+ const MCDisassembler *Dis) {
+ // Addresses in an MCInst are represented as five operands:
+ // 1. basereg (register) The R/M base, or (if there is a SIB) the
+ // SIB base
+ // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
+ // scale amount
+ // 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
+ // the index (which is multiplied by the
+ // scale amount)
+ // 4. displacement (immediate) 0, or the displacement if there is one
+ // 5. segmentreg (register) x86_registerNONE for now, but could be set
+ // if we have segment overrides
+
+ MCOperand baseReg;
+ MCOperand scaleAmount;
+ MCOperand indexReg;
+ MCOperand displacement;
+ MCOperand segmentReg;
+ uint64_t pcrel = 0;
+
+ if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
+ if (insn.sibBase != SIB_BASE_NONE) {
+ switch (insn.sibBase) {
+ default:
+ debug("Unexpected sibBase");
+ return true;
+#define ENTRY(x) \
+ case SIB_BASE_##x: \
+ baseReg = MCOperand::createReg(X86::x); break;
+ ALL_SIB_BASES
+#undef ENTRY
+ }
+ } else {
+ baseReg = MCOperand::createReg(0);
+ }
+
+ // Check whether we are handling VSIB addressing mode for GATHER.
+ // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and
+ // we should use SIB_INDEX_XMM4|YMM4 for VSIB.
+ // I don't see a way to get the correct IndexReg in readSIB:
+ // We can tell whether it is VSIB or SIB after instruction ID is decoded,
+ // but instruction ID may not be decoded yet when calling readSIB.
+ uint32_t Opcode = mcInst.getOpcode();
+ bool IndexIs128 = (Opcode == X86::VGATHERDPDrm ||
+ Opcode == X86::VGATHERDPDYrm ||
+ Opcode == X86::VGATHERQPDrm ||
+ Opcode == X86::VGATHERDPSrm ||
+ Opcode == X86::VGATHERQPSrm ||
+ Opcode == X86::VPGATHERDQrm ||
+ Opcode == X86::VPGATHERDQYrm ||
+ Opcode == X86::VPGATHERQQrm ||
+ Opcode == X86::VPGATHERDDrm ||
+ Opcode == X86::VPGATHERQDrm);
+ bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm ||
+ Opcode == X86::VGATHERDPSYrm ||
+ Opcode == X86::VGATHERQPSYrm ||
+ Opcode == X86::VGATHERDPDZrm ||
+ Opcode == X86::VPGATHERDQZrm ||
+ Opcode == X86::VPGATHERQQYrm ||
+ Opcode == X86::VPGATHERDDYrm ||
+ Opcode == X86::VPGATHERQDYrm);
+ bool IndexIs512 = (Opcode == X86::VGATHERQPDZrm ||
+ Opcode == X86::VGATHERDPSZrm ||
+ Opcode == X86::VGATHERQPSZrm ||
+ Opcode == X86::VPGATHERQQZrm ||
+ Opcode == X86::VPGATHERDDZrm ||
+ Opcode == X86::VPGATHERQDZrm);
+ if (IndexIs128 || IndexIs256 || IndexIs512) {
+ unsigned IndexOffset = insn.sibIndex -
+ (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX);
+ SIBIndex IndexBase = IndexIs512 ? SIB_INDEX_ZMM0 :
+ IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0;
+ insn.sibIndex = (SIBIndex)(IndexBase +
+ (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset));
+ }
+
+ if (insn.sibIndex != SIB_INDEX_NONE) {
+ switch (insn.sibIndex) {
+ default:
+ debug("Unexpected sibIndex");
+ return true;
+#define ENTRY(x) \
+ case SIB_INDEX_##x: \
+ indexReg = MCOperand::createReg(X86::x); break;
+ EA_BASES_32BIT
+ EA_BASES_64BIT
+ REGS_XMM
+ REGS_YMM
+ REGS_ZMM
+#undef ENTRY
+ }
+ } else {
+ indexReg = MCOperand::createReg(0);
+ }
+
+ scaleAmount = MCOperand::createImm(insn.sibScale);
+ } else {
+ switch (insn.eaBase) {
+ case EA_BASE_NONE:
+ if (insn.eaDisplacement == EA_DISP_NONE) {
+ debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
+ return true;
+ }
+ if (insn.mode == MODE_64BIT){
+ pcrel = insn.startLocation +
+ insn.displacementOffset + insn.displacementSize;
+ tryAddingPcLoadReferenceComment(insn.startLocation +
+ insn.displacementOffset,
+ insn.displacement + pcrel, Dis);
+ baseReg = MCOperand::createReg(X86::RIP); // Section 2.2.1.6
+ }
+ else
+ baseReg = MCOperand::createReg(0);
+
+ indexReg = MCOperand::createReg(0);
+ break;
+ case EA_BASE_BX_SI:
+ baseReg = MCOperand::createReg(X86::BX);
+ indexReg = MCOperand::createReg(X86::SI);
+ break;
+ case EA_BASE_BX_DI:
+ baseReg = MCOperand::createReg(X86::BX);
+ indexReg = MCOperand::createReg(X86::DI);
+ break;
+ case EA_BASE_BP_SI:
+ baseReg = MCOperand::createReg(X86::BP);
+ indexReg = MCOperand::createReg(X86::SI);
+ break;
+ case EA_BASE_BP_DI:
+ baseReg = MCOperand::createReg(X86::BP);
+ indexReg = MCOperand::createReg(X86::DI);
+ break;
+ default:
+ indexReg = MCOperand::createReg(0);
+ switch (insn.eaBase) {
+ default:
+ debug("Unexpected eaBase");
+ return true;
+ // Here, we will use the fill-ins defined above. However,
+ // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
+ // sib and sib64 were handled in the top-level if, so they're only
+ // placeholders to keep the compiler happy.
+#define ENTRY(x) \
+ case EA_BASE_##x: \
+ baseReg = MCOperand::createReg(X86::x); break;
+ ALL_EA_BASES
+#undef ENTRY
+#define ENTRY(x) case EA_REG_##x:
+ ALL_REGS
+#undef ENTRY
+ debug("A R/M memory operand may not be a register; "
+ "the base field must be a base.");
+ return true;
+ }
+ }
+
+ scaleAmount = MCOperand::createImm(1);
+ }
+
+ displacement = MCOperand::createImm(insn.displacement);
+
+ segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
+
+ mcInst.addOperand(baseReg);
+ mcInst.addOperand(scaleAmount);
+ mcInst.addOperand(indexReg);
+ if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false,
+ insn.startLocation, insn.displacementOffset,
+ insn.displacementSize, mcInst, Dis))
+ mcInst.addOperand(displacement);
+ mcInst.addOperand(segmentReg);
+ return false;
+}
+
+/// translateRM - Translates an operand stored in the R/M (and possibly SIB)
+/// byte of an instruction to LLVM form, and appends it to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param operand - The operand, as stored in the descriptor table.
+/// @param insn - The instruction to extract Mod, R/M, and SIB fields
+/// from.
+/// @return - 0 on success; nonzero otherwise
+static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
+ InternalInstruction &insn, const MCDisassembler *Dis) {
+ switch (operand.type) {
+ default:
+ debug("Unexpected type for a R/M operand");
+ return true;
+ case TYPE_R8:
+ case TYPE_R16:
+ case TYPE_R32:
+ case TYPE_R64:
+ case TYPE_Rv:
+ case TYPE_MM64:
+ case TYPE_XMM:
+ case TYPE_XMM32:
+ case TYPE_XMM64:
+ case TYPE_XMM128:
+ case TYPE_XMM256:
+ case TYPE_XMM512:
+ case TYPE_VK1:
+ case TYPE_VK2:
+ case TYPE_VK4:
+ case TYPE_VK8:
+ case TYPE_VK16:
+ case TYPE_VK32:
+ case TYPE_VK64:
+ case TYPE_DEBUGREG:
+ case TYPE_CONTROLREG:
+ case TYPE_BNDR:
+ return translateRMRegister(mcInst, insn);
+ case TYPE_M:
+ case TYPE_M8:
+ case TYPE_M16:
+ case TYPE_M32:
+ case TYPE_M64:
+ case TYPE_M128:
+ case TYPE_M256:
+ case TYPE_M512:
+ case TYPE_Mv:
+ case TYPE_M32FP:
+ case TYPE_M64FP:
+ case TYPE_M80FP:
+ case TYPE_M1616:
+ case TYPE_M1632:
+ case TYPE_M1664:
+ case TYPE_LEA:
+ return translateRMMemory(mcInst, insn, Dis);
+ }
+}
+
+/// translateFPRegister - Translates a stack position on the FPU stack to its
+/// LLVM form, and appends it to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param stackPos - The stack position to translate.
+static void translateFPRegister(MCInst &mcInst,
+ uint8_t stackPos) {
+ mcInst.addOperand(MCOperand::createReg(X86::ST0 + stackPos));
+}
+
+/// translateMaskRegister - Translates a 3-bit mask register number to
+/// LLVM form, and appends it to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param maskRegNum - Number of mask register from 0 to 7.
+/// @return - false on success; true otherwise.
+static bool translateMaskRegister(MCInst &mcInst,
+ uint8_t maskRegNum) {
+ if (maskRegNum >= 8) {
+ debug("Invalid mask register number");
+ return true;
+ }
+
+ mcInst.addOperand(MCOperand::createReg(X86::K0 + maskRegNum));
+ return false;
+}
+
+/// translateOperand - Translates an operand stored in an internal instruction
+/// to LLVM's format and appends it to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param operand - The operand, as stored in the descriptor table.
+/// @param insn - The internal instruction.
+/// @return - false on success; true otherwise.
+static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
+ InternalInstruction &insn,
+ const MCDisassembler *Dis) {
+ switch (operand.encoding) {
+ default:
+ debug("Unhandled operand encoding during translation");
+ return true;
+ case ENCODING_REG:
+ translateRegister(mcInst, insn.reg);
+ return false;
+ case ENCODING_WRITEMASK:
+ return translateMaskRegister(mcInst, insn.writemask);
+ CASE_ENCODING_RM:
+ return translateRM(mcInst, operand, insn, Dis);
+ case ENCODING_CB:
+ case ENCODING_CW:
+ case ENCODING_CD:
+ case ENCODING_CP:
+ case ENCODING_CO:
+ case ENCODING_CT:
+ debug("Translation of code offsets isn't supported.");
+ return true;
+ case ENCODING_IB:
+ case ENCODING_IW:
+ case ENCODING_ID:
+ case ENCODING_IO:
+ case ENCODING_Iv:
+ case ENCODING_Ia:
+ translateImmediate(mcInst,
+ insn.immediates[insn.numImmediatesTranslated++],
+ operand,
+ insn,
+ Dis);
+ return false;
+ case ENCODING_SI:
+ return translateSrcIndex(mcInst, insn);
+ case ENCODING_DI:
+ return translateDstIndex(mcInst, insn);
+ case ENCODING_RB:
+ case ENCODING_RW:
+ case ENCODING_RD:
+ case ENCODING_RO:
+ case ENCODING_Rv:
+ translateRegister(mcInst, insn.opcodeRegister);
+ return false;
+ case ENCODING_FP:
+ translateFPRegister(mcInst, insn.modRM & 7);
+ return false;
+ case ENCODING_VVVV:
+ translateRegister(mcInst, insn.vvvv);
+ return false;
+ case ENCODING_DUP:
+ return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0],
+ insn, Dis);
+ }
+}
+
+/// translateInstruction - Translates an internal instruction and all its
+/// operands to an MCInst.
+///
+/// @param mcInst - The MCInst to populate with the instruction's data.
+/// @param insn - The internal instruction.
+/// @return - false on success; true otherwise.
+static bool translateInstruction(MCInst &mcInst,
+ InternalInstruction &insn,
+ const MCDisassembler *Dis) {
+ if (!insn.spec) {
+ debug("Instruction has no specification");
+ return true;
+ }
+
+ mcInst.clear();
+ mcInst.setOpcode(insn.instructionID);
+ // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
+ // prefix bytes should be disassembled as xrelease and xacquire then set the
+ // opcode to those instead of the rep and repne opcodes.
+ if (insn.xAcquireRelease) {
+ if(mcInst.getOpcode() == X86::REP_PREFIX)
+ mcInst.setOpcode(X86::XRELEASE_PREFIX);
+ else if(mcInst.getOpcode() == X86::REPNE_PREFIX)
+ mcInst.setOpcode(X86::XACQUIRE_PREFIX);
+ }
+
+ insn.numImmediatesTranslated = 0;
+
+ for (const auto &Op : insn.operands) {
+ if (Op.encoding != ENCODING_NONE) {
+ if (translateOperand(mcInst, Op, insn, Dis)) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+static MCDisassembler *createX86Disassembler(const Target &T,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo());
+ return new X86Disassembler::X86GenericDisassembler(STI, Ctx, std::move(MII));
+}
+
+extern "C" void LLVMInitializeX86Disassembler() {
+ // Register the disassembler.
+ TargetRegistry::RegisterMCDisassembler(TheX86_32Target,
+ createX86Disassembler);
+ TargetRegistry::RegisterMCDisassembler(TheX86_64Target,
+ createX86Disassembler);
+}
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h
new file mode 100644
index 000000000000..d7f426b2641d
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -0,0 +1,112 @@
+//===-- X86Disassembler.h - Disassembler for x86 and x86_64 -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
+// 64-bit X86 instruction sets. The main decode sequence for an assembly
+// instruction in this disassembler is:
+//
+// 1. Read the prefix bytes and determine the attributes of the instruction.
+// These attributes, recorded in enum attributeBits
+// (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM
+// provides a mapping from bitmasks to contexts, which are represented by
+// enum InstructionContext (ibid.).
+//
+// 2. Read the opcode, and determine what kind of opcode it is. The
+// disassembler distinguishes four kinds of opcodes, which are enumerated in
+// OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
+// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
+// (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
+//
+// 3. Depending on the opcode type, look in one of four ClassDecision structures
+// (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which
+// OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get
+// a ModRMDecision (ibid.).
+//
+// 4. Some instructions, such as escape opcodes or extended opcodes, or even
+// instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
+// ModR/M byte to complete decode. The ModRMDecision's type is an entry from
+// ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
+// ModR/M byte is required and how to interpret it.
+//
+// 5. After resolving the ModRMDecision, the disassembler has a unique ID
+// of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in
+// INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
+// meanings of its operands.
+//
+// 6. For each operand, its encoding is an entry from OperandEncoding
+// (X86DisassemblerDecoderCommon.h) and its type is an entry from
+// OperandType (ibid.). The encoding indicates how to read it from the
+// instruction; the type indicates how to interpret the value once it has
+// been read. For example, a register operand could be stored in the R/M
+// field of the ModR/M byte, the REG field of the ModR/M byte, or added to
+// the main opcode. This is orthogonal from its meaning (an GPR or an XMM
+// register, for instance). Given this information, the operands can be
+// extracted and interpreted.
+//
+// 7. As the last step, the disassembler translates the instruction information
+// and operands into a format understandable by the client - in this case, an
+// MCInst for use by the MC infrastructure.
+//
+// The disassembler is broken broadly into two parts: the table emitter that
+// emits the instruction decode tables discussed above during compilation, and
+// the disassembler itself. The table emitter is documented in more detail in
+// utils/TableGen/X86DisassemblerEmitter.h.
+//
+// X86Disassembler.h contains the public interface for the disassembler,
+// adhering to the MCDisassembler interface.
+// X86Disassembler.cpp contains the code responsible for step 7, and for
+// invoking the decoder to execute steps 1-6.
+// X86DisassemblerDecoderCommon.h contains the definitions needed by both the
+// table emitter and the disassembler.
+// X86DisassemblerDecoder.h contains the public interface of the decoder,
+// factored out into C for possible use by other projects.
+// X86DisassemblerDecoder.c contains the source code of the decoder, which is
+// responsible for steps 1-6.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLER_H
+#define LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLER_H
+
+#include "X86DisassemblerDecoderCommon.h"
+#include "llvm/MC/MCDisassembler.h"
+
+namespace llvm {
+
+class MCInst;
+class MCInstrInfo;
+class MCSubtargetInfo;
+class MemoryObject;
+class raw_ostream;
+
+namespace X86Disassembler {
+
+/// Generic disassembler for all X86 platforms. All each platform class should
+/// have to do is subclass the constructor, and provide a different
+/// disassemblerMode value.
+class X86GenericDisassembler : public MCDisassembler {
+ std::unique_ptr<const MCInstrInfo> MII;
+public:
+ X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
+ std::unique_ptr<const MCInstrInfo> MII);
+public:
+ DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const override;
+
+private:
+ DisassemblerMode fMode;
+};
+
+} // namespace X86Disassembler
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
new file mode 100644
index 000000000000..040143b15587
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -0,0 +1,1909 @@
+//===-- X86DisassemblerDecoder.cpp - Disassembler decoder -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler.
+// It contains the implementation of the instruction decoder.
+// Documentation for the disassembler can be found in X86Disassembler.h.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cstdarg> /* for va_*() */
+#include <cstdio> /* for vsnprintf() */
+#include <cstdlib> /* for exit() */
+#include <cstring> /* for memset() */
+
+#include "X86DisassemblerDecoder.h"
+
+using namespace llvm::X86Disassembler;
+
+/// Specifies whether a ModR/M byte is needed and (if so) which
+/// instruction each possible value of the ModR/M byte corresponds to. Once
+/// this information is known, we have narrowed down to a single instruction.
+struct ModRMDecision {
+ uint8_t modrm_type;
+ uint16_t instructionIDs;
+};
+
+/// Specifies which set of ModR/M->instruction tables to look at
+/// given a particular opcode.
+struct OpcodeDecision {
+ ModRMDecision modRMDecisions[256];
+};
+
+/// Specifies which opcode->instruction tables to look at given
+/// a particular context (set of attributes). Since there are many possible
+/// contexts, the decoder first uses CONTEXTS_SYM to determine which context
+/// applies given a specific set of attributes. Hence there are only IC_max
+/// entries in this table, rather than 2^(ATTR_max).
+struct ContextDecision {
+ OpcodeDecision opcodeDecisions[IC_max];
+};
+
+#include "X86GenDisassemblerTables.inc"
+
+#ifndef NDEBUG
+#define debug(s) do { Debug(__FILE__, __LINE__, s); } while (0)
+#else
+#define debug(s) do { } while (0)
+#endif
+
+
+/*
+ * contextForAttrs - Client for the instruction context table. Takes a set of
+ * attributes and returns the appropriate decode context.
+ *
+ * @param attrMask - Attributes, from the enumeration attributeBits.
+ * @return - The InstructionContext to use when looking up an
+ * an instruction with these attributes.
+ */
+static InstructionContext contextForAttrs(uint16_t attrMask) {
+ return static_cast<InstructionContext>(CONTEXTS_SYM[attrMask]);
+}
+
+/*
+ * modRMRequired - Reads the appropriate instruction table to determine whether
+ * the ModR/M byte is required to decode a particular instruction.
+ *
+ * @param type - The opcode type (i.e., how many bytes it has).
+ * @param insnContext - The context for the instruction, as returned by
+ * contextForAttrs.
+ * @param opcode - The last byte of the instruction's opcode, not counting
+ * ModR/M extensions and escapes.
+ * @return - true if the ModR/M byte is required, false otherwise.
+ */
+static int modRMRequired(OpcodeType type,
+ InstructionContext insnContext,
+ uint16_t opcode) {
+ const struct ContextDecision* decision = nullptr;
+
+ switch (type) {
+ case ONEBYTE:
+ decision = &ONEBYTE_SYM;
+ break;
+ case TWOBYTE:
+ decision = &TWOBYTE_SYM;
+ break;
+ case THREEBYTE_38:
+ decision = &THREEBYTE38_SYM;
+ break;
+ case THREEBYTE_3A:
+ decision = &THREEBYTE3A_SYM;
+ break;
+ case XOP8_MAP:
+ decision = &XOP8_MAP_SYM;
+ break;
+ case XOP9_MAP:
+ decision = &XOP9_MAP_SYM;
+ break;
+ case XOPA_MAP:
+ decision = &XOPA_MAP_SYM;
+ break;
+ }
+
+ return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
+ modrm_type != MODRM_ONEENTRY;
+}
+
+/*
+ * decode - Reads the appropriate instruction table to obtain the unique ID of
+ * an instruction.
+ *
+ * @param type - See modRMRequired().
+ * @param insnContext - See modRMRequired().
+ * @param opcode - See modRMRequired().
+ * @param modRM - The ModR/M byte if required, or any value if not.
+ * @return - The UID of the instruction, or 0 on failure.
+ */
+static InstrUID decode(OpcodeType type,
+ InstructionContext insnContext,
+ uint8_t opcode,
+ uint8_t modRM) {
+ const struct ModRMDecision* dec = nullptr;
+
+ switch (type) {
+ case ONEBYTE:
+ dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case TWOBYTE:
+ dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case THREEBYTE_38:
+ dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case THREEBYTE_3A:
+ dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case XOP8_MAP:
+ dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case XOP9_MAP:
+ dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case XOPA_MAP:
+ dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ }
+
+ switch (dec->modrm_type) {
+ default:
+ debug("Corrupt table! Unknown modrm_type");
+ return 0;
+ case MODRM_ONEENTRY:
+ return modRMTable[dec->instructionIDs];
+ case MODRM_SPLITRM:
+ if (modFromModRM(modRM) == 0x3)
+ return modRMTable[dec->instructionIDs+1];
+ return modRMTable[dec->instructionIDs];
+ case MODRM_SPLITREG:
+ if (modFromModRM(modRM) == 0x3)
+ return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
+ return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
+ case MODRM_SPLITMISC:
+ if (modFromModRM(modRM) == 0x3)
+ return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
+ return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
+ case MODRM_FULL:
+ return modRMTable[dec->instructionIDs+modRM];
+ }
+}
+
+/*
+ * specifierForUID - Given a UID, returns the name and operand specification for
+ * that instruction.
+ *
+ * @param uid - The unique ID for the instruction. This should be returned by
+ * decode(); specifierForUID will not check bounds.
+ * @return - A pointer to the specification for that instruction.
+ */
+static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
+ return &INSTRUCTIONS_SYM[uid];
+}
+
+/*
+ * consumeByte - Uses the reader function provided by the user to consume one
+ * byte from the instruction's memory and advance the cursor.
+ *
+ * @param insn - The instruction with the reader function to use. The cursor
+ * for this instruction is advanced.
+ * @param byte - A pointer to a pre-allocated memory buffer to be populated
+ * with the data read.
+ * @return - 0 if the read was successful; nonzero otherwise.
+ */
+static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
+ int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
+
+ if (!ret)
+ ++(insn->readerCursor);
+
+ return ret;
+}
+
+/*
+ * lookAtByte - Like consumeByte, but does not advance the cursor.
+ *
+ * @param insn - See consumeByte().
+ * @param byte - See consumeByte().
+ * @return - See consumeByte().
+ */
+static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
+ return insn->reader(insn->readerArg, byte, insn->readerCursor);
+}
+
+static void unconsumeByte(struct InternalInstruction* insn) {
+ insn->readerCursor--;
+}
+
+#define CONSUME_FUNC(name, type) \
+ static int name(struct InternalInstruction* insn, type* ptr) { \
+ type combined = 0; \
+ unsigned offset; \
+ for (offset = 0; offset < sizeof(type); ++offset) { \
+ uint8_t byte; \
+ int ret = insn->reader(insn->readerArg, \
+ &byte, \
+ insn->readerCursor + offset); \
+ if (ret) \
+ return ret; \
+ combined = combined | ((uint64_t)byte << (offset * 8)); \
+ } \
+ *ptr = combined; \
+ insn->readerCursor += sizeof(type); \
+ return 0; \
+ }
+
+/*
+ * consume* - Use the reader function provided by the user to consume data
+ * values of various sizes from the instruction's memory and advance the
+ * cursor appropriately. These readers perform endian conversion.
+ *
+ * @param insn - See consumeByte().
+ * @param ptr - A pointer to a pre-allocated memory of appropriate size to
+ * be populated with the data read.
+ * @return - See consumeByte().
+ */
+CONSUME_FUNC(consumeInt8, int8_t)
+CONSUME_FUNC(consumeInt16, int16_t)
+CONSUME_FUNC(consumeInt32, int32_t)
+CONSUME_FUNC(consumeUInt16, uint16_t)
+CONSUME_FUNC(consumeUInt32, uint32_t)
+CONSUME_FUNC(consumeUInt64, uint64_t)
+
+/*
+ * dbgprintf - Uses the logging function provided by the user to log a single
+ * message, typically without a carriage-return.
+ *
+ * @param insn - The instruction containing the logging function.
+ * @param format - See printf().
+ * @param ... - See printf().
+ */
+static void dbgprintf(struct InternalInstruction* insn,
+ const char* format,
+ ...) {
+ char buffer[256];
+ va_list ap;
+
+ if (!insn->dlog)
+ return;
+
+ va_start(ap, format);
+ (void)vsnprintf(buffer, sizeof(buffer), format, ap);
+ va_end(ap);
+
+ insn->dlog(insn->dlogArg, buffer);
+
+ return;
+}
+
+/*
+ * setPrefixPresent - Marks that a particular prefix is present at a particular
+ * location.
+ *
+ * @param insn - The instruction to be marked as having the prefix.
+ * @param prefix - The prefix that is present.
+ * @param location - The location where the prefix is located (in the address
+ * space of the instruction's reader).
+ */
+static void setPrefixPresent(struct InternalInstruction* insn,
+ uint8_t prefix,
+ uint64_t location)
+{
+ insn->prefixPresent[prefix] = 1;
+ insn->prefixLocations[prefix] = location;
+}
+
+/*
+ * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
+ * present at a given location.
+ *
+ * @param insn - The instruction to be queried.
+ * @param prefix - The prefix.
+ * @param location - The location to query.
+ * @return - Whether the prefix is at that location.
+ */
+static bool isPrefixAtLocation(struct InternalInstruction* insn,
+ uint8_t prefix,
+ uint64_t location)
+{
+ return insn->prefixPresent[prefix] == 1 &&
+ insn->prefixLocations[prefix] == location;
+}
+
+/*
+ * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
+ * instruction as having them. Also sets the instruction's default operand,
+ * address, and other relevant data sizes to report operands correctly.
+ *
+ * @param insn - The instruction whose prefixes are to be read.
+ * @return - 0 if the instruction could be read until the end of the prefix
+ * bytes, and no prefixes conflicted; nonzero otherwise.
+ */
+static int readPrefixes(struct InternalInstruction* insn) {
+ bool isPrefix = true;
+ bool prefixGroups[4] = { false };
+ uint64_t prefixLocation;
+ uint8_t byte = 0;
+ uint8_t nextByte;
+
+ bool hasAdSize = false;
+ bool hasOpSize = false;
+
+ dbgprintf(insn, "readPrefixes()");
+
+ while (isPrefix) {
+ prefixLocation = insn->readerCursor;
+
+ /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */
+ if (consumeByte(insn, &byte))
+ break;
+
+ /*
+ * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
+ * break and let it be disassembled as a normal "instruction".
+ */
+ if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
+ break;
+
+ if (insn->readerCursor - 1 == insn->startLocation
+ && (byte == 0xf2 || byte == 0xf3)
+ && !lookAtByte(insn, &nextByte))
+ {
+ /*
+ * If the byte is 0xf2 or 0xf3, and any of the following conditions are
+ * met:
+ * - it is followed by a LOCK (0xf0) prefix
+ * - it is followed by an xchg instruction
+ * then it should be disassembled as a xacquire/xrelease not repne/rep.
+ */
+ if ((byte == 0xf2 || byte == 0xf3) &&
+ ((nextByte == 0xf0) ||
+ ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90)))
+ insn->xAcquireRelease = true;
+ /*
+ * Also if the byte is 0xf3, and the following condition is met:
+ * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
+ * "mov mem, imm" (opcode 0xc6/0xc7) instructions.
+ * then it should be disassembled as an xrelease not rep.
+ */
+ if (byte == 0xf3 &&
+ (nextByte == 0x88 || nextByte == 0x89 ||
+ nextByte == 0xc6 || nextByte == 0xc7))
+ insn->xAcquireRelease = true;
+ if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) {
+ if (consumeByte(insn, &nextByte))
+ return -1;
+ if (lookAtByte(insn, &nextByte))
+ return -1;
+ unconsumeByte(insn);
+ }
+ if (nextByte != 0x0f && nextByte != 0x90)
+ break;
+ }
+
+ switch (byte) {
+ case 0xf0: /* LOCK */
+ case 0xf2: /* REPNE/REPNZ */
+ case 0xf3: /* REP or REPE/REPZ */
+ if (prefixGroups[0])
+ dbgprintf(insn, "Redundant Group 1 prefix");
+ prefixGroups[0] = true;
+ setPrefixPresent(insn, byte, prefixLocation);
+ break;
+ case 0x2e: /* CS segment override -OR- Branch not taken */
+ case 0x36: /* SS segment override -OR- Branch taken */
+ case 0x3e: /* DS segment override */
+ case 0x26: /* ES segment override */
+ case 0x64: /* FS segment override */
+ case 0x65: /* GS segment override */
+ switch (byte) {
+ case 0x2e:
+ insn->segmentOverride = SEG_OVERRIDE_CS;
+ break;
+ case 0x36:
+ insn->segmentOverride = SEG_OVERRIDE_SS;
+ break;
+ case 0x3e:
+ insn->segmentOverride = SEG_OVERRIDE_DS;
+ break;
+ case 0x26:
+ insn->segmentOverride = SEG_OVERRIDE_ES;
+ break;
+ case 0x64:
+ insn->segmentOverride = SEG_OVERRIDE_FS;
+ break;
+ case 0x65:
+ insn->segmentOverride = SEG_OVERRIDE_GS;
+ break;
+ default:
+ debug("Unhandled override");
+ return -1;
+ }
+ if (prefixGroups[1])
+ dbgprintf(insn, "Redundant Group 2 prefix");
+ prefixGroups[1] = true;
+ setPrefixPresent(insn, byte, prefixLocation);
+ break;
+ case 0x66: /* Operand-size override */
+ if (prefixGroups[2])
+ dbgprintf(insn, "Redundant Group 3 prefix");
+ prefixGroups[2] = true;
+ hasOpSize = true;
+ setPrefixPresent(insn, byte, prefixLocation);
+ break;
+ case 0x67: /* Address-size override */
+ if (prefixGroups[3])
+ dbgprintf(insn, "Redundant Group 4 prefix");
+ prefixGroups[3] = true;
+ hasAdSize = true;
+ setPrefixPresent(insn, byte, prefixLocation);
+ break;
+ default: /* Not a prefix byte */
+ isPrefix = false;
+ break;
+ }
+
+ if (isPrefix)
+ dbgprintf(insn, "Found prefix 0x%hhx", byte);
+ }
+
+ insn->vectorExtensionType = TYPE_NO_VEX_XOP;
+
+ if (byte == 0x62) {
+ uint8_t byte1, byte2;
+
+ if (consumeByte(insn, &byte1)) {
+ dbgprintf(insn, "Couldn't read second byte of EVEX prefix");
+ return -1;
+ }
+
+ if (lookAtByte(insn, &byte2)) {
+ dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
+ return -1;
+ }
+
+ if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
+ ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) {
+ insn->vectorExtensionType = TYPE_EVEX;
+ } else {
+ unconsumeByte(insn); /* unconsume byte1 */
+ unconsumeByte(insn); /* unconsume byte */
+ insn->necessaryPrefixLocation = insn->readerCursor - 2;
+ }
+
+ if (insn->vectorExtensionType == TYPE_EVEX) {
+ insn->vectorExtensionPrefix[0] = byte;
+ insn->vectorExtensionPrefix[1] = byte1;
+ if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) {
+ dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
+ return -1;
+ }
+ if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) {
+ dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix");
+ return -1;
+ }
+
+ /* We simulate the REX prefix for simplicity's sake */
+ if (insn->mode == MODE_64BIT) {
+ insn->rexPrefix = 0x40
+ | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3)
+ | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2)
+ | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1)
+ | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
+ }
+
+ dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
+ insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
+ insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]);
+ }
+ } else if (byte == 0xc4) {
+ uint8_t byte1;
+
+ if (lookAtByte(insn, &byte1)) {
+ dbgprintf(insn, "Couldn't read second byte of VEX");
+ return -1;
+ }
+
+ if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
+ insn->vectorExtensionType = TYPE_VEX_3B;
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ } else {
+ unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+
+ if (insn->vectorExtensionType == TYPE_VEX_3B) {
+ insn->vectorExtensionPrefix[0] = byte;
+ consumeByte(insn, &insn->vectorExtensionPrefix[1]);
+ consumeByte(insn, &insn->vectorExtensionPrefix[2]);
+
+ /* We simulate the REX prefix for simplicity's sake */
+
+ if (insn->mode == MODE_64BIT) {
+ insn->rexPrefix = 0x40
+ | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3)
+ | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2)
+ | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1)
+ | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
+ }
+
+ dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
+ insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
+ insn->vectorExtensionPrefix[2]);
+ }
+ } else if (byte == 0xc5) {
+ uint8_t byte1;
+
+ if (lookAtByte(insn, &byte1)) {
+ dbgprintf(insn, "Couldn't read second byte of VEX");
+ return -1;
+ }
+
+ if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
+ insn->vectorExtensionType = TYPE_VEX_2B;
+ } else {
+ unconsumeByte(insn);
+ }
+
+ if (insn->vectorExtensionType == TYPE_VEX_2B) {
+ insn->vectorExtensionPrefix[0] = byte;
+ consumeByte(insn, &insn->vectorExtensionPrefix[1]);
+
+ if (insn->mode == MODE_64BIT) {
+ insn->rexPrefix = 0x40
+ | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
+ }
+
+ switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
+ default:
+ break;
+ case VEX_PREFIX_66:
+ hasOpSize = true;
+ break;
+ }
+
+ dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx",
+ insn->vectorExtensionPrefix[0],
+ insn->vectorExtensionPrefix[1]);
+ }
+ } else if (byte == 0x8f) {
+ uint8_t byte1;
+
+ if (lookAtByte(insn, &byte1)) {
+ dbgprintf(insn, "Couldn't read second byte of XOP");
+ return -1;
+ }
+
+ if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */
+ insn->vectorExtensionType = TYPE_XOP;
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ } else {
+ unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+
+ if (insn->vectorExtensionType == TYPE_XOP) {
+ insn->vectorExtensionPrefix[0] = byte;
+ consumeByte(insn, &insn->vectorExtensionPrefix[1]);
+ consumeByte(insn, &insn->vectorExtensionPrefix[2]);
+
+ /* We simulate the REX prefix for simplicity's sake */
+
+ if (insn->mode == MODE_64BIT) {
+ insn->rexPrefix = 0x40
+ | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3)
+ | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2)
+ | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1)
+ | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
+ }
+
+ switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
+ default:
+ break;
+ case VEX_PREFIX_66:
+ hasOpSize = true;
+ break;
+ }
+
+ dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
+ insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
+ insn->vectorExtensionPrefix[2]);
+ }
+ } else {
+ if (insn->mode == MODE_64BIT) {
+ if ((byte & 0xf0) == 0x40) {
+ uint8_t opcodeByte;
+
+ if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
+ dbgprintf(insn, "Redundant REX prefix");
+ return -1;
+ }
+
+ insn->rexPrefix = byte;
+ insn->necessaryPrefixLocation = insn->readerCursor - 2;
+
+ dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
+ } else {
+ unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+ } else {
+ unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+ }
+
+ if (insn->mode == MODE_16BIT) {
+ insn->registerSize = (hasOpSize ? 4 : 2);
+ insn->addressSize = (hasAdSize ? 4 : 2);
+ insn->displacementSize = (hasAdSize ? 4 : 2);
+ insn->immediateSize = (hasOpSize ? 4 : 2);
+ } else if (insn->mode == MODE_32BIT) {
+ insn->registerSize = (hasOpSize ? 2 : 4);
+ insn->addressSize = (hasAdSize ? 2 : 4);
+ insn->displacementSize = (hasAdSize ? 2 : 4);
+ insn->immediateSize = (hasOpSize ? 2 : 4);
+ } else if (insn->mode == MODE_64BIT) {
+ if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
+ insn->registerSize = 8;
+ insn->addressSize = (hasAdSize ? 4 : 8);
+ insn->displacementSize = 4;
+ insn->immediateSize = 4;
+ } else if (insn->rexPrefix) {
+ insn->registerSize = (hasOpSize ? 2 : 4);
+ insn->addressSize = (hasAdSize ? 4 : 8);
+ insn->displacementSize = (hasOpSize ? 2 : 4);
+ insn->immediateSize = (hasOpSize ? 2 : 4);
+ } else {
+ insn->registerSize = (hasOpSize ? 2 : 4);
+ insn->addressSize = (hasAdSize ? 4 : 8);
+ insn->displacementSize = (hasOpSize ? 2 : 4);
+ insn->immediateSize = (hasOpSize ? 2 : 4);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
+ * extended or escape opcodes).
+ *
+ * @param insn - The instruction whose opcode is to be read.
+ * @return - 0 if the opcode could be read successfully; nonzero otherwise.
+ */
+static int readOpcode(struct InternalInstruction* insn) {
+ /* Determine the length of the primary opcode */
+
+ uint8_t current;
+
+ dbgprintf(insn, "readOpcode()");
+
+ insn->opcodeType = ONEBYTE;
+
+ if (insn->vectorExtensionType == TYPE_EVEX) {
+ switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
+ default:
+ dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)",
+ mmFromEVEX2of4(insn->vectorExtensionPrefix[1]));
+ return -1;
+ case VEX_LOB_0F:
+ insn->opcodeType = TWOBYTE;
+ return consumeByte(insn, &insn->opcode);
+ case VEX_LOB_0F38:
+ insn->opcodeType = THREEBYTE_38;
+ return consumeByte(insn, &insn->opcode);
+ case VEX_LOB_0F3A:
+ insn->opcodeType = THREEBYTE_3A;
+ return consumeByte(insn, &insn->opcode);
+ }
+ } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
+ switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
+ default:
+ dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
+ mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
+ return -1;
+ case VEX_LOB_0F:
+ insn->opcodeType = TWOBYTE;
+ return consumeByte(insn, &insn->opcode);
+ case VEX_LOB_0F38:
+ insn->opcodeType = THREEBYTE_38;
+ return consumeByte(insn, &insn->opcode);
+ case VEX_LOB_0F3A:
+ insn->opcodeType = THREEBYTE_3A;
+ return consumeByte(insn, &insn->opcode);
+ }
+ } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
+ insn->opcodeType = TWOBYTE;
+ return consumeByte(insn, &insn->opcode);
+ } else if (insn->vectorExtensionType == TYPE_XOP) {
+ switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
+ default:
+ dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
+ mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
+ return -1;
+ case XOP_MAP_SELECT_8:
+ insn->opcodeType = XOP8_MAP;
+ return consumeByte(insn, &insn->opcode);
+ case XOP_MAP_SELECT_9:
+ insn->opcodeType = XOP9_MAP;
+ return consumeByte(insn, &insn->opcode);
+ case XOP_MAP_SELECT_A:
+ insn->opcodeType = XOPA_MAP;
+ return consumeByte(insn, &insn->opcode);
+ }
+ }
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ if (current == 0x0f) {
+ dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ if (current == 0x38) {
+ dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ insn->opcodeType = THREEBYTE_38;
+ } else if (current == 0x3a) {
+ dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ insn->opcodeType = THREEBYTE_3A;
+ } else {
+ dbgprintf(insn, "Didn't find a three-byte escape prefix");
+
+ insn->opcodeType = TWOBYTE;
+ }
+ }
+
+ /*
+ * At this point we have consumed the full opcode.
+ * Anything we consume from here on must be unconsumed.
+ */
+
+ insn->opcode = current;
+
+ return 0;
+}
+
+static int readModRM(struct InternalInstruction* insn);
+
+/*
+ * getIDWithAttrMask - Determines the ID of an instruction, consuming
+ * the ModR/M byte as appropriate for extended and escape opcodes,
+ * and using a supplied attribute mask.
+ *
+ * @param instructionID - A pointer whose target is filled in with the ID of the
+ * instruction.
+ * @param insn - The instruction whose ID is to be determined.
+ * @param attrMask - The attribute mask to search.
+ * @return - 0 if the ModR/M could be read when needed or was not
+ * needed; nonzero otherwise.
+ */
+static int getIDWithAttrMask(uint16_t* instructionID,
+ struct InternalInstruction* insn,
+ uint16_t attrMask) {
+ bool hasModRMExtension;
+
+ InstructionContext instructionClass = contextForAttrs(attrMask);
+
+ hasModRMExtension = modRMRequired(insn->opcodeType,
+ instructionClass,
+ insn->opcode);
+
+ if (hasModRMExtension) {
+ if (readModRM(insn))
+ return -1;
+
+ *instructionID = decode(insn->opcodeType,
+ instructionClass,
+ insn->opcode,
+ insn->modRM);
+ } else {
+ *instructionID = decode(insn->opcodeType,
+ instructionClass,
+ insn->opcode,
+ 0);
+ }
+
+ return 0;
+}
+
+/*
+ * is16BitEquivalent - Determines whether two instruction names refer to
+ * equivalent instructions but one is 16-bit whereas the other is not.
+ *
+ * @param orig - The instruction that is not 16-bit
+ * @param equiv - The instruction that is 16-bit
+ */
+static bool is16BitEquivalent(const char* orig, const char* equiv) {
+ off_t i;
+
+ for (i = 0;; i++) {
+ if (orig[i] == '\0' && equiv[i] == '\0')
+ return true;
+ if (orig[i] == '\0' || equiv[i] == '\0')
+ return false;
+ if (orig[i] != equiv[i]) {
+ if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
+ continue;
+ if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
+ continue;
+ if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
+ continue;
+ return false;
+ }
+ }
+}
+
+/*
+ * is64Bit - Determines whether this instruction is a 64-bit instruction.
+ *
+ * @param name - The instruction that is not 16-bit
+ */
+static bool is64Bit(const char* name) {
+ off_t i;
+
+ for (i = 0;; ++i) {
+ if (name[i] == '\0')
+ return false;
+ if (name[i] == '6' && name[i+1] == '4')
+ return true;
+ }
+}
+
+/*
+ * getID - Determines the ID of an instruction, consuming the ModR/M byte as
+ * appropriate for extended and escape opcodes. Determines the attributes and
+ * context for the instruction before doing so.
+ *
+ * @param insn - The instruction whose ID is to be determined.
+ * @return - 0 if the ModR/M could be read when needed or was not needed;
+ * nonzero otherwise.
+ */
+static int getID(struct InternalInstruction* insn, const void *miiArg) {
+ uint16_t attrMask;
+ uint16_t instructionID;
+
+ dbgprintf(insn, "getID()");
+
+ attrMask = ATTR_NONE;
+
+ if (insn->mode == MODE_64BIT)
+ attrMask |= ATTR_64BIT;
+
+ if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
+ attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
+
+ if (insn->vectorExtensionType == TYPE_EVEX) {
+ switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
+ case VEX_PREFIX_66:
+ attrMask |= ATTR_OPSIZE;
+ break;
+ case VEX_PREFIX_F3:
+ attrMask |= ATTR_XS;
+ break;
+ case VEX_PREFIX_F2:
+ attrMask |= ATTR_XD;
+ break;
+ }
+
+ if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
+ attrMask |= ATTR_EVEXKZ;
+ if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
+ attrMask |= ATTR_EVEXB;
+ if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
+ attrMask |= ATTR_EVEXK;
+ if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
+ attrMask |= ATTR_EVEXL;
+ if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
+ attrMask |= ATTR_EVEXL2;
+ } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
+ switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
+ case VEX_PREFIX_66:
+ attrMask |= ATTR_OPSIZE;
+ break;
+ case VEX_PREFIX_F3:
+ attrMask |= ATTR_XS;
+ break;
+ case VEX_PREFIX_F2:
+ attrMask |= ATTR_XD;
+ break;
+ }
+
+ if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
+ attrMask |= ATTR_VEXL;
+ } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
+ switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
+ case VEX_PREFIX_66:
+ attrMask |= ATTR_OPSIZE;
+ break;
+ case VEX_PREFIX_F3:
+ attrMask |= ATTR_XS;
+ break;
+ case VEX_PREFIX_F2:
+ attrMask |= ATTR_XD;
+ break;
+ }
+
+ if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
+ attrMask |= ATTR_VEXL;
+ } else if (insn->vectorExtensionType == TYPE_XOP) {
+ switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
+ case VEX_PREFIX_66:
+ attrMask |= ATTR_OPSIZE;
+ break;
+ case VEX_PREFIX_F3:
+ attrMask |= ATTR_XS;
+ break;
+ case VEX_PREFIX_F2:
+ attrMask |= ATTR_XD;
+ break;
+ }
+
+ if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
+ attrMask |= ATTR_VEXL;
+ } else {
+ return -1;
+ }
+ } else {
+ if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_OPSIZE;
+ else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_ADSIZE;
+ else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_XS;
+ else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_XD;
+ }
+
+ if (insn->rexPrefix & 0x08)
+ attrMask |= ATTR_REXW;
+
+ /*
+ * JCXZ/JECXZ need special handling for 16-bit mode because the meaning
+ * of the AdSize prefix is inverted w.r.t. 32-bit mode.
+ */
+ if (insn->mode == MODE_16BIT && insn->opcodeType == ONEBYTE &&
+ insn->opcode == 0xE3)
+ attrMask ^= ATTR_ADSIZE;
+
+ /*
+ * In 64-bit mode all f64 superscripted opcodes ignore opcode size prefix
+ * CALL/JMP/JCC instructions need to ignore 0x66 and consume 4 bytes
+ */
+
+ if (insn->mode == MODE_64BIT &&
+ isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) {
+ switch (insn->opcode) {
+ case 0xE8:
+ case 0xE9:
+ // Take care of psubsb and other mmx instructions.
+ if (insn->opcodeType == ONEBYTE) {
+ attrMask ^= ATTR_OPSIZE;
+ insn->immediateSize = 4;
+ insn->displacementSize = 4;
+ }
+ break;
+ case 0x82:
+ case 0x83:
+ case 0x84:
+ case 0x85:
+ case 0x86:
+ case 0x87:
+ case 0x88:
+ case 0x89:
+ case 0x8A:
+ case 0x8B:
+ case 0x8C:
+ case 0x8D:
+ case 0x8E:
+ case 0x8F:
+ // Take care of lea and three byte ops.
+ if (insn->opcodeType == TWOBYTE) {
+ attrMask ^= ATTR_OPSIZE;
+ insn->immediateSize = 4;
+ insn->displacementSize = 4;
+ }
+ break;
+ }
+ }
+
+ if (getIDWithAttrMask(&instructionID, insn, attrMask))
+ return -1;
+
+ /* The following clauses compensate for limitations of the tables. */
+
+ if (insn->mode != MODE_64BIT &&
+ insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
+ /*
+ * The tables can't distinquish between cases where the W-bit is used to
+ * select register size and cases where its a required part of the opcode.
+ */
+ if ((insn->vectorExtensionType == TYPE_EVEX &&
+ wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
+ (insn->vectorExtensionType == TYPE_VEX_3B &&
+ wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
+ (insn->vectorExtensionType == TYPE_XOP &&
+ wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
+
+ uint16_t instructionIDWithREXW;
+ if (getIDWithAttrMask(&instructionIDWithREXW,
+ insn, attrMask | ATTR_REXW)) {
+ insn->instructionID = instructionID;
+ insn->spec = specifierForUID(instructionID);
+ return 0;
+ }
+
+ const char *SpecName = GetInstrName(instructionIDWithREXW, miiArg);
+ // If not a 64-bit instruction. Switch the opcode.
+ if (!is64Bit(SpecName)) {
+ insn->instructionID = instructionIDWithREXW;
+ insn->spec = specifierForUID(instructionIDWithREXW);
+ return 0;
+ }
+ }
+ }
+
+ /*
+ * Absolute moves need special handling.
+ * -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
+ * inverted w.r.t.
+ * -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
+ * any position.
+ */
+ if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) {
+ /* Make sure we observed the prefixes in any position. */
+ if (insn->prefixPresent[0x67])
+ attrMask |= ATTR_ADSIZE;
+ if (insn->prefixPresent[0x66])
+ attrMask |= ATTR_OPSIZE;
+
+ /* In 16-bit, invert the attributes. */
+ if (insn->mode == MODE_16BIT)
+ attrMask ^= ATTR_ADSIZE | ATTR_OPSIZE;
+
+ if (getIDWithAttrMask(&instructionID, insn, attrMask))
+ return -1;
+
+ insn->instructionID = instructionID;
+ insn->spec = specifierForUID(instructionID);
+ return 0;
+ }
+
+ if ((insn->mode == MODE_16BIT || insn->prefixPresent[0x66]) &&
+ !(attrMask & ATTR_OPSIZE)) {
+ /*
+ * The instruction tables make no distinction between instructions that
+ * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
+ * particular spot (i.e., many MMX operations). In general we're
+ * conservative, but in the specific case where OpSize is present but not
+ * in the right place we check if there's a 16-bit operation.
+ */
+
+ const struct InstructionSpecifier *spec;
+ uint16_t instructionIDWithOpsize;
+ const char *specName, *specWithOpSizeName;
+
+ spec = specifierForUID(instructionID);
+
+ if (getIDWithAttrMask(&instructionIDWithOpsize,
+ insn,
+ attrMask | ATTR_OPSIZE)) {
+ /*
+ * ModRM required with OpSize but not present; give up and return version
+ * without OpSize set
+ */
+
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ return 0;
+ }
+
+ specName = GetInstrName(instructionID, miiArg);
+ specWithOpSizeName = GetInstrName(instructionIDWithOpsize, miiArg);
+
+ if (is16BitEquivalent(specName, specWithOpSizeName) &&
+ (insn->mode == MODE_16BIT) ^ insn->prefixPresent[0x66]) {
+ insn->instructionID = instructionIDWithOpsize;
+ insn->spec = specifierForUID(instructionIDWithOpsize);
+ } else {
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ }
+ return 0;
+ }
+
+ if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
+ insn->rexPrefix & 0x01) {
+ /*
+ * NOOP shouldn't decode as NOOP if REX.b is set. Instead
+ * it should decode as XCHG %r8, %eax.
+ */
+
+ const struct InstructionSpecifier *spec;
+ uint16_t instructionIDWithNewOpcode;
+ const struct InstructionSpecifier *specWithNewOpcode;
+
+ spec = specifierForUID(instructionID);
+
+ /* Borrow opcode from one of the other XCHGar opcodes */
+ insn->opcode = 0x91;
+
+ if (getIDWithAttrMask(&instructionIDWithNewOpcode,
+ insn,
+ attrMask)) {
+ insn->opcode = 0x90;
+
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ return 0;
+ }
+
+ specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
+
+ /* Change back */
+ insn->opcode = 0x90;
+
+ insn->instructionID = instructionIDWithNewOpcode;
+ insn->spec = specWithNewOpcode;
+
+ return 0;
+ }
+
+ insn->instructionID = instructionID;
+ insn->spec = specifierForUID(insn->instructionID);
+
+ return 0;
+}
+
+/*
+ * readSIB - Consumes the SIB byte to determine addressing information for an
+ * instruction.
+ *
+ * @param insn - The instruction whose SIB byte is to be read.
+ * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
+ */
+static int readSIB(struct InternalInstruction* insn) {
+ SIBIndex sibIndexBase = SIB_INDEX_NONE;
+ SIBBase sibBaseBase = SIB_BASE_NONE;
+ uint8_t index, base;
+
+ dbgprintf(insn, "readSIB()");
+
+ if (insn->consumedSIB)
+ return 0;
+
+ insn->consumedSIB = true;
+
+ switch (insn->addressSize) {
+ case 2:
+ dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
+ return -1;
+ case 4:
+ sibIndexBase = SIB_INDEX_EAX;
+ sibBaseBase = SIB_BASE_EAX;
+ break;
+ case 8:
+ sibIndexBase = SIB_INDEX_RAX;
+ sibBaseBase = SIB_BASE_RAX;
+ break;
+ }
+
+ if (consumeByte(insn, &insn->sib))
+ return -1;
+
+ index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
+
+ // FIXME: The fifth bit (bit index 4) is only to be used for instructions
+ // that understand VSIB indexing. ORing the bit in here is mildy dangerous
+ // because performing math on an 'enum SIBIndex' can produce garbage.
+ // Excluding the "none" value, it should cover 6 spaces of register names:
+ // - 16 possibilities for 16-bit GPR starting at SIB_INDEX_BX_SI
+ // - 16 possibilities for 32-bit GPR starting at SIB_INDEX_EAX
+ // - 16 possibilities for 64-bit GPR starting at SIB_INDEX_RAX
+ // - 32 possibilities for each of XMM, YMM, ZMM registers
+ // When sibIndexBase gets assigned SIB_INDEX_RAX as it does in 64-bit mode,
+ // summing in a fully decoded index between 0 and 31 can end up with a value
+ // that looks like something in the low half of the XMM range.
+ // translateRMMemory() tries to reverse the damage, with only partial success,
+ // as evidenced by known bugs in "test/MC/Disassembler/X86/x86-64.txt"
+ if (insn->vectorExtensionType == TYPE_EVEX)
+ index |= v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4;
+
+ if (index == 0x4) {
+ insn->sibIndex = SIB_INDEX_NONE;
+ } else {
+ insn->sibIndex = (SIBIndex)(sibIndexBase + index);
+ }
+
+ insn->sibScale = 1 << scaleFromSIB(insn->sib);
+
+ base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
+
+ switch (base) {
+ case 0x5:
+ case 0xd:
+ switch (modFromModRM(insn->modRM)) {
+ case 0x0:
+ insn->eaDisplacement = EA_DISP_32;
+ insn->sibBase = SIB_BASE_NONE;
+ break;
+ case 0x1:
+ insn->eaDisplacement = EA_DISP_8;
+ insn->sibBase = (SIBBase)(sibBaseBase + base);
+ break;
+ case 0x2:
+ insn->eaDisplacement = EA_DISP_32;
+ insn->sibBase = (SIBBase)(sibBaseBase + base);
+ break;
+ case 0x3:
+ debug("Cannot have Mod = 0b11 and a SIB byte");
+ return -1;
+ }
+ break;
+ default:
+ insn->sibBase = (SIBBase)(sibBaseBase + base);
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * readDisplacement - Consumes the displacement of an instruction.
+ *
+ * @param insn - The instruction whose displacement is to be read.
+ * @return - 0 if the displacement byte was successfully read; nonzero
+ * otherwise.
+ */
+static int readDisplacement(struct InternalInstruction* insn) {
+ int8_t d8;
+ int16_t d16;
+ int32_t d32;
+
+ dbgprintf(insn, "readDisplacement()");
+
+ if (insn->consumedDisplacement)
+ return 0;
+
+ insn->consumedDisplacement = true;
+ insn->displacementOffset = insn->readerCursor - insn->startLocation;
+
+ switch (insn->eaDisplacement) {
+ case EA_DISP_NONE:
+ insn->consumedDisplacement = false;
+ break;
+ case EA_DISP_8:
+ if (consumeInt8(insn, &d8))
+ return -1;
+ insn->displacement = d8;
+ break;
+ case EA_DISP_16:
+ if (consumeInt16(insn, &d16))
+ return -1;
+ insn->displacement = d16;
+ break;
+ case EA_DISP_32:
+ if (consumeInt32(insn, &d32))
+ return -1;
+ insn->displacement = d32;
+ break;
+ }
+
+ insn->consumedDisplacement = true;
+ return 0;
+}
+
+/*
+ * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
+ * displacement) for an instruction and interprets it.
+ *
+ * @param insn - The instruction whose addressing information is to be read.
+ * @return - 0 if the information was successfully read; nonzero otherwise.
+ */
+static int readModRM(struct InternalInstruction* insn) {
+ uint8_t mod, rm, reg;
+
+ dbgprintf(insn, "readModRM()");
+
+ if (insn->consumedModRM)
+ return 0;
+
+ if (consumeByte(insn, &insn->modRM))
+ return -1;
+ insn->consumedModRM = true;
+
+ mod = modFromModRM(insn->modRM);
+ rm = rmFromModRM(insn->modRM);
+ reg = regFromModRM(insn->modRM);
+
+ /*
+ * This goes by insn->registerSize to pick the correct register, which messes
+ * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
+ * fixupReg().
+ */
+ switch (insn->registerSize) {
+ case 2:
+ insn->regBase = MODRM_REG_AX;
+ insn->eaRegBase = EA_REG_AX;
+ break;
+ case 4:
+ insn->regBase = MODRM_REG_EAX;
+ insn->eaRegBase = EA_REG_EAX;
+ break;
+ case 8:
+ insn->regBase = MODRM_REG_RAX;
+ insn->eaRegBase = EA_REG_RAX;
+ break;
+ }
+
+ reg |= rFromREX(insn->rexPrefix) << 3;
+ rm |= bFromREX(insn->rexPrefix) << 3;
+ if (insn->vectorExtensionType == TYPE_EVEX) {
+ reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
+ rm |= xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
+ }
+
+ insn->reg = (Reg)(insn->regBase + reg);
+
+ switch (insn->addressSize) {
+ case 2:
+ insn->eaBaseBase = EA_BASE_BX_SI;
+
+ switch (mod) {
+ case 0x0:
+ if (rm == 0x6) {
+ insn->eaBase = EA_BASE_NONE;
+ insn->eaDisplacement = EA_DISP_16;
+ if (readDisplacement(insn))
+ return -1;
+ } else {
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ insn->eaDisplacement = EA_DISP_NONE;
+ }
+ break;
+ case 0x1:
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ insn->eaDisplacement = EA_DISP_8;
+ insn->displacementSize = 1;
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ case 0x2:
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ insn->eaDisplacement = EA_DISP_16;
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ case 0x3:
+ insn->eaBase = (EABase)(insn->eaRegBase + rm);
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ }
+ break;
+ case 4:
+ case 8:
+ insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
+
+ switch (mod) {
+ case 0x0:
+ insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
+ // In determining whether RIP-relative mode is used (rm=5),
+ // or whether a SIB byte is present (rm=4),
+ // the extension bits (REX.b and EVEX.x) are ignored.
+ switch (rm & 7) {
+ case 0x4: // SIB byte is present
+ insn->eaBase = (insn->addressSize == 4 ?
+ EA_BASE_sib : EA_BASE_sib64);
+ if (readSIB(insn) || readDisplacement(insn))
+ return -1;
+ break;
+ case 0x5: // RIP-relative
+ insn->eaBase = EA_BASE_NONE;
+ insn->eaDisplacement = EA_DISP_32;
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ default:
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ break;
+ }
+ break;
+ case 0x1:
+ insn->displacementSize = 1;
+ /* FALLTHROUGH */
+ case 0x2:
+ insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
+ switch (rm & 7) {
+ case 0x4: // SIB byte is present
+ insn->eaBase = EA_BASE_sib;
+ if (readSIB(insn) || readDisplacement(insn))
+ return -1;
+ break;
+ default:
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ }
+ break;
+ case 0x3:
+ insn->eaDisplacement = EA_DISP_NONE;
+ insn->eaBase = (EABase)(insn->eaRegBase + rm);
+ break;
+ }
+ break;
+ } /* switch (insn->addressSize) */
+
+ return 0;
+}
+
+#define GENERIC_FIXUP_FUNC(name, base, prefix) \
+ static uint8_t name(struct InternalInstruction *insn, \
+ OperandType type, \
+ uint8_t index, \
+ uint8_t *valid) { \
+ *valid = 1; \
+ switch (type) { \
+ default: \
+ debug("Unhandled register type"); \
+ *valid = 0; \
+ return 0; \
+ case TYPE_Rv: \
+ return base + index; \
+ case TYPE_R8: \
+ if (insn->rexPrefix && \
+ index >= 4 && index <= 7) { \
+ return prefix##_SPL + (index - 4); \
+ } else { \
+ return prefix##_AL + index; \
+ } \
+ case TYPE_R16: \
+ return prefix##_AX + index; \
+ case TYPE_R32: \
+ return prefix##_EAX + index; \
+ case TYPE_R64: \
+ return prefix##_RAX + index; \
+ case TYPE_XMM512: \
+ return prefix##_ZMM0 + index; \
+ case TYPE_XMM256: \
+ return prefix##_YMM0 + index; \
+ case TYPE_XMM128: \
+ case TYPE_XMM64: \
+ case TYPE_XMM32: \
+ case TYPE_XMM: \
+ return prefix##_XMM0 + index; \
+ case TYPE_VK1: \
+ case TYPE_VK2: \
+ case TYPE_VK4: \
+ case TYPE_VK8: \
+ case TYPE_VK16: \
+ case TYPE_VK32: \
+ case TYPE_VK64: \
+ if (index > 7) \
+ *valid = 0; \
+ return prefix##_K0 + index; \
+ case TYPE_MM64: \
+ return prefix##_MM0 + (index & 0x7); \
+ case TYPE_SEGMENTREG: \
+ if (index > 5) \
+ *valid = 0; \
+ return prefix##_ES + index; \
+ case TYPE_DEBUGREG: \
+ return prefix##_DR0 + index; \
+ case TYPE_CONTROLREG: \
+ return prefix##_CR0 + index; \
+ } \
+ }
+
+/*
+ * fixup*Value - Consults an operand type to determine the meaning of the
+ * reg or R/M field. If the operand is an XMM operand, for example, an
+ * operand would be XMM0 instead of AX, which readModRM() would otherwise
+ * misinterpret it as.
+ *
+ * @param insn - The instruction containing the operand.
+ * @param type - The operand type.
+ * @param index - The existing value of the field as reported by readModRM().
+ * @param valid - The address of a uint8_t. The target is set to 1 if the
+ * field is valid for the register class; 0 if not.
+ * @return - The proper value.
+ */
+GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
+GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
+
+/*
+ * fixupReg - Consults an operand specifier to determine which of the
+ * fixup*Value functions to use in correcting readModRM()'ss interpretation.
+ *
+ * @param insn - See fixup*Value().
+ * @param op - The operand specifier.
+ * @return - 0 if fixup was successful; -1 if the register returned was
+ * invalid for its class.
+ */
+static int fixupReg(struct InternalInstruction *insn,
+ const struct OperandSpecifier *op) {
+ uint8_t valid;
+
+ dbgprintf(insn, "fixupReg()");
+
+ switch ((OperandEncoding)op->encoding) {
+ default:
+ debug("Expected a REG or R/M encoding in fixupReg");
+ return -1;
+ case ENCODING_VVVV:
+ insn->vvvv = (Reg)fixupRegValue(insn,
+ (OperandType)op->type,
+ insn->vvvv,
+ &valid);
+ if (!valid)
+ return -1;
+ break;
+ case ENCODING_REG:
+ insn->reg = (Reg)fixupRegValue(insn,
+ (OperandType)op->type,
+ insn->reg - insn->regBase,
+ &valid);
+ if (!valid)
+ return -1;
+ break;
+ CASE_ENCODING_RM:
+ if (insn->eaBase >= insn->eaRegBase) {
+ insn->eaBase = (EABase)fixupRMValue(insn,
+ (OperandType)op->type,
+ insn->eaBase - insn->eaRegBase,
+ &valid);
+ if (!valid)
+ return -1;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * readOpcodeRegister - Reads an operand from the opcode field of an
+ * instruction and interprets it appropriately given the operand width.
+ * Handles AddRegFrm instructions.
+ *
+ * @param insn - the instruction whose opcode field is to be read.
+ * @param size - The width (in bytes) of the register being specified.
+ * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
+ * RAX.
+ * @return - 0 on success; nonzero otherwise.
+ */
+static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
+ dbgprintf(insn, "readOpcodeRegister()");
+
+ if (size == 0)
+ size = insn->registerSize;
+
+ switch (size) {
+ case 1:
+ insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
+ | (insn->opcode & 7)));
+ if (insn->rexPrefix &&
+ insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
+ insn->opcodeRegister < MODRM_REG_AL + 0x8) {
+ insn->opcodeRegister = (Reg)(MODRM_REG_SPL
+ + (insn->opcodeRegister - MODRM_REG_AL - 4));
+ }
+
+ break;
+ case 2:
+ insn->opcodeRegister = (Reg)(MODRM_REG_AX
+ + ((bFromREX(insn->rexPrefix) << 3)
+ | (insn->opcode & 7)));
+ break;
+ case 4:
+ insn->opcodeRegister = (Reg)(MODRM_REG_EAX
+ + ((bFromREX(insn->rexPrefix) << 3)
+ | (insn->opcode & 7)));
+ break;
+ case 8:
+ insn->opcodeRegister = (Reg)(MODRM_REG_RAX
+ + ((bFromREX(insn->rexPrefix) << 3)
+ | (insn->opcode & 7)));
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * readImmediate - Consumes an immediate operand from an instruction, given the
+ * desired operand size.
+ *
+ * @param insn - The instruction whose operand is to be read.
+ * @param size - The width (in bytes) of the operand.
+ * @return - 0 if the immediate was successfully consumed; nonzero
+ * otherwise.
+ */
+static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
+ uint8_t imm8;
+ uint16_t imm16;
+ uint32_t imm32;
+ uint64_t imm64;
+
+ dbgprintf(insn, "readImmediate()");
+
+ if (insn->numImmediatesConsumed == 2) {
+ debug("Already consumed two immediates");
+ return -1;
+ }
+
+ if (size == 0)
+ size = insn->immediateSize;
+ else
+ insn->immediateSize = size;
+ insn->immediateOffset = insn->readerCursor - insn->startLocation;
+
+ switch (size) {
+ case 1:
+ if (consumeByte(insn, &imm8))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm8;
+ break;
+ case 2:
+ if (consumeUInt16(insn, &imm16))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm16;
+ break;
+ case 4:
+ if (consumeUInt32(insn, &imm32))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm32;
+ break;
+ case 8:
+ if (consumeUInt64(insn, &imm64))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm64;
+ break;
+ }
+
+ insn->numImmediatesConsumed++;
+
+ return 0;
+}
+
+/*
+ * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
+ *
+ * @param insn - The instruction whose operand is to be read.
+ * @return - 0 if the vvvv was successfully consumed; nonzero
+ * otherwise.
+ */
+static int readVVVV(struct InternalInstruction* insn) {
+ dbgprintf(insn, "readVVVV()");
+
+ int vvvv;
+ if (insn->vectorExtensionType == TYPE_EVEX)
+ vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
+ vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]));
+ else if (insn->vectorExtensionType == TYPE_VEX_3B)
+ vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
+ else if (insn->vectorExtensionType == TYPE_VEX_2B)
+ vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
+ else if (insn->vectorExtensionType == TYPE_XOP)
+ vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
+ else
+ return -1;
+
+ if (insn->mode != MODE_64BIT)
+ vvvv &= 0x7;
+
+ insn->vvvv = static_cast<Reg>(vvvv);
+ return 0;
+}
+
+/*
+ * readMaskRegister - Reads an mask register from the opcode field of an
+ * instruction.
+ *
+ * @param insn - The instruction whose opcode field is to be read.
+ * @return - 0 on success; nonzero otherwise.
+ */
+static int readMaskRegister(struct InternalInstruction* insn) {
+ dbgprintf(insn, "readMaskRegister()");
+
+ if (insn->vectorExtensionType != TYPE_EVEX)
+ return -1;
+
+ insn->writemask =
+ static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
+ return 0;
+}
+
+/*
+ * readOperands - Consults the specifier for an instruction and consumes all
+ * operands for that instruction, interpreting them as it goes.
+ *
+ * @param insn - The instruction whose operands are to be read and interpreted.
+ * @return - 0 if all operands could be read; nonzero otherwise.
+ */
+static int readOperands(struct InternalInstruction* insn) {
+ int hasVVVV, needVVVV;
+ int sawRegImm = 0;
+
+ dbgprintf(insn, "readOperands()");
+
+ /* If non-zero vvvv specified, need to make sure one of the operands
+ uses it. */
+ hasVVVV = !readVVVV(insn);
+ needVVVV = hasVVVV && (insn->vvvv != 0);
+
+ for (const auto &Op : x86OperandSets[insn->spec->operands]) {
+ switch (Op.encoding) {
+ case ENCODING_NONE:
+ case ENCODING_SI:
+ case ENCODING_DI:
+ break;
+ case ENCODING_REG:
+ CASE_ENCODING_RM:
+ if (readModRM(insn))
+ return -1;
+ if (fixupReg(insn, &Op))
+ return -1;
+ // Apply the AVX512 compressed displacement scaling factor.
+ if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
+ insn->displacement *= 1 << (Op.encoding - ENCODING_RM);
+ break;
+ case ENCODING_CB:
+ case ENCODING_CW:
+ case ENCODING_CD:
+ case ENCODING_CP:
+ case ENCODING_CO:
+ case ENCODING_CT:
+ dbgprintf(insn, "We currently don't hande code-offset encodings");
+ return -1;
+ case ENCODING_IB:
+ if (sawRegImm) {
+ /* Saw a register immediate so don't read again and instead split the
+ previous immediate. FIXME: This is a hack. */
+ insn->immediates[insn->numImmediatesConsumed] =
+ insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
+ ++insn->numImmediatesConsumed;
+ break;
+ }
+ if (readImmediate(insn, 1))
+ return -1;
+ if (Op.type == TYPE_XMM128 ||
+ Op.type == TYPE_XMM256)
+ sawRegImm = 1;
+ break;
+ case ENCODING_IW:
+ if (readImmediate(insn, 2))
+ return -1;
+ break;
+ case ENCODING_ID:
+ if (readImmediate(insn, 4))
+ return -1;
+ break;
+ case ENCODING_IO:
+ if (readImmediate(insn, 8))
+ return -1;
+ break;
+ case ENCODING_Iv:
+ if (readImmediate(insn, insn->immediateSize))
+ return -1;
+ break;
+ case ENCODING_Ia:
+ if (readImmediate(insn, insn->addressSize))
+ return -1;
+ break;
+ case ENCODING_RB:
+ if (readOpcodeRegister(insn, 1))
+ return -1;
+ break;
+ case ENCODING_RW:
+ if (readOpcodeRegister(insn, 2))
+ return -1;
+ break;
+ case ENCODING_RD:
+ if (readOpcodeRegister(insn, 4))
+ return -1;
+ break;
+ case ENCODING_RO:
+ if (readOpcodeRegister(insn, 8))
+ return -1;
+ break;
+ case ENCODING_Rv:
+ if (readOpcodeRegister(insn, 0))
+ return -1;
+ break;
+ case ENCODING_FP:
+ break;
+ case ENCODING_VVVV:
+ needVVVV = 0; /* Mark that we have found a VVVV operand. */
+ if (!hasVVVV)
+ return -1;
+ if (fixupReg(insn, &Op))
+ return -1;
+ break;
+ case ENCODING_WRITEMASK:
+ if (readMaskRegister(insn))
+ return -1;
+ break;
+ case ENCODING_DUP:
+ break;
+ default:
+ dbgprintf(insn, "Encountered an operand with an unknown encoding.");
+ return -1;
+ }
+ }
+
+ /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
+ if (needVVVV) return -1;
+
+ return 0;
+}
+
+/*
+ * decodeInstruction - Reads and interprets a full instruction provided by the
+ * user.
+ *
+ * @param insn - A pointer to the instruction to be populated. Must be
+ * pre-allocated.
+ * @param reader - The function to be used to read the instruction's bytes.
+ * @param readerArg - A generic argument to be passed to the reader to store
+ * any internal state.
+ * @param logger - If non-NULL, the function to be used to write log messages
+ * and warnings.
+ * @param loggerArg - A generic argument to be passed to the logger to store
+ * any internal state.
+ * @param startLoc - The address (in the reader's address space) of the first
+ * byte in the instruction.
+ * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
+ * decode the instruction in.
+ * @return - 0 if the instruction's memory could be read; nonzero if
+ * not.
+ */
+int llvm::X86Disassembler::decodeInstruction(
+ struct InternalInstruction *insn, byteReader_t reader,
+ const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg,
+ uint64_t startLoc, DisassemblerMode mode) {
+ memset(insn, 0, sizeof(struct InternalInstruction));
+
+ insn->reader = reader;
+ insn->readerArg = readerArg;
+ insn->dlog = logger;
+ insn->dlogArg = loggerArg;
+ insn->startLocation = startLoc;
+ insn->readerCursor = startLoc;
+ insn->mode = mode;
+ insn->numImmediatesConsumed = 0;
+
+ if (readPrefixes(insn) ||
+ readOpcode(insn) ||
+ getID(insn, miiArg) ||
+ insn->instructionID == 0 ||
+ readOperands(insn))
+ return -1;
+
+ insn->operands = x86OperandSets[insn->spec->operands];
+
+ insn->length = insn->readerCursor - insn->startLocation;
+
+ dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
+ startLoc, insn->readerCursor, insn->length);
+
+ if (insn->length > 15)
+ dbgprintf(insn, "Instruction exceeds 15-byte limit");
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
new file mode 100644
index 000000000000..28a628e5066b
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -0,0 +1,675 @@
+//===-- X86DisassemblerDecoderInternal.h - Disassembler decoder -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler.
+// It contains the public interface of the instruction decoder.
+// Documentation for the disassembler can be found in X86Disassembler.h.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H
+#define LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H
+
+#include "X86DisassemblerDecoderCommon.h"
+#include "llvm/ADT/ArrayRef.h"
+
+namespace llvm {
+namespace X86Disassembler {
+
+// Accessor functions for various fields of an Intel instruction
+#define modFromModRM(modRM) (((modRM) & 0xc0) >> 6)
+#define regFromModRM(modRM) (((modRM) & 0x38) >> 3)
+#define rmFromModRM(modRM) ((modRM) & 0x7)
+#define scaleFromSIB(sib) (((sib) & 0xc0) >> 6)
+#define indexFromSIB(sib) (((sib) & 0x38) >> 3)
+#define baseFromSIB(sib) ((sib) & 0x7)
+#define wFromREX(rex) (((rex) & 0x8) >> 3)
+#define rFromREX(rex) (((rex) & 0x4) >> 2)
+#define xFromREX(rex) (((rex) & 0x2) >> 1)
+#define bFromREX(rex) ((rex) & 0x1)
+
+#define rFromEVEX2of4(evex) (((~(evex)) & 0x80) >> 7)
+#define xFromEVEX2of4(evex) (((~(evex)) & 0x40) >> 6)
+#define bFromEVEX2of4(evex) (((~(evex)) & 0x20) >> 5)
+#define r2FromEVEX2of4(evex) (((~(evex)) & 0x10) >> 4)
+#define mmFromEVEX2of4(evex) ((evex) & 0x3)
+#define wFromEVEX3of4(evex) (((evex) & 0x80) >> 7)
+#define vvvvFromEVEX3of4(evex) (((~(evex)) & 0x78) >> 3)
+#define ppFromEVEX3of4(evex) ((evex) & 0x3)
+#define zFromEVEX4of4(evex) (((evex) & 0x80) >> 7)
+#define l2FromEVEX4of4(evex) (((evex) & 0x40) >> 6)
+#define lFromEVEX4of4(evex) (((evex) & 0x20) >> 5)
+#define bFromEVEX4of4(evex) (((evex) & 0x10) >> 4)
+#define v2FromEVEX4of4(evex) (((~evex) & 0x8) >> 3)
+#define aaaFromEVEX4of4(evex) ((evex) & 0x7)
+
+#define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7)
+#define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6)
+#define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5)
+#define mmmmmFromVEX2of3(vex) ((vex) & 0x1f)
+#define wFromVEX3of3(vex) (((vex) & 0x80) >> 7)
+#define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3)
+#define lFromVEX3of3(vex) (((vex) & 0x4) >> 2)
+#define ppFromVEX3of3(vex) ((vex) & 0x3)
+
+#define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7)
+#define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3)
+#define lFromVEX2of2(vex) (((vex) & 0x4) >> 2)
+#define ppFromVEX2of2(vex) ((vex) & 0x3)
+
+#define rFromXOP2of3(xop) (((~(xop)) & 0x80) >> 7)
+#define xFromXOP2of3(xop) (((~(xop)) & 0x40) >> 6)
+#define bFromXOP2of3(xop) (((~(xop)) & 0x20) >> 5)
+#define mmmmmFromXOP2of3(xop) ((xop) & 0x1f)
+#define wFromXOP3of3(xop) (((xop) & 0x80) >> 7)
+#define vvvvFromXOP3of3(vex) (((~(vex)) & 0x78) >> 3)
+#define lFromXOP3of3(xop) (((xop) & 0x4) >> 2)
+#define ppFromXOP3of3(xop) ((xop) & 0x3)
+
+// These enums represent Intel registers for use by the decoder.
+#define REGS_8BIT \
+ ENTRY(AL) \
+ ENTRY(CL) \
+ ENTRY(DL) \
+ ENTRY(BL) \
+ ENTRY(AH) \
+ ENTRY(CH) \
+ ENTRY(DH) \
+ ENTRY(BH) \
+ ENTRY(R8B) \
+ ENTRY(R9B) \
+ ENTRY(R10B) \
+ ENTRY(R11B) \
+ ENTRY(R12B) \
+ ENTRY(R13B) \
+ ENTRY(R14B) \
+ ENTRY(R15B) \
+ ENTRY(SPL) \
+ ENTRY(BPL) \
+ ENTRY(SIL) \
+ ENTRY(DIL)
+
+#define EA_BASES_16BIT \
+ ENTRY(BX_SI) \
+ ENTRY(BX_DI) \
+ ENTRY(BP_SI) \
+ ENTRY(BP_DI) \
+ ENTRY(SI) \
+ ENTRY(DI) \
+ ENTRY(BP) \
+ ENTRY(BX) \
+ ENTRY(R8W) \
+ ENTRY(R9W) \
+ ENTRY(R10W) \
+ ENTRY(R11W) \
+ ENTRY(R12W) \
+ ENTRY(R13W) \
+ ENTRY(R14W) \
+ ENTRY(R15W)
+
+#define REGS_16BIT \
+ ENTRY(AX) \
+ ENTRY(CX) \
+ ENTRY(DX) \
+ ENTRY(BX) \
+ ENTRY(SP) \
+ ENTRY(BP) \
+ ENTRY(SI) \
+ ENTRY(DI) \
+ ENTRY(R8W) \
+ ENTRY(R9W) \
+ ENTRY(R10W) \
+ ENTRY(R11W) \
+ ENTRY(R12W) \
+ ENTRY(R13W) \
+ ENTRY(R14W) \
+ ENTRY(R15W)
+
+#define EA_BASES_32BIT \
+ ENTRY(EAX) \
+ ENTRY(ECX) \
+ ENTRY(EDX) \
+ ENTRY(EBX) \
+ ENTRY(sib) \
+ ENTRY(EBP) \
+ ENTRY(ESI) \
+ ENTRY(EDI) \
+ ENTRY(R8D) \
+ ENTRY(R9D) \
+ ENTRY(R10D) \
+ ENTRY(R11D) \
+ ENTRY(R12D) \
+ ENTRY(R13D) \
+ ENTRY(R14D) \
+ ENTRY(R15D)
+
+#define REGS_32BIT \
+ ENTRY(EAX) \
+ ENTRY(ECX) \
+ ENTRY(EDX) \
+ ENTRY(EBX) \
+ ENTRY(ESP) \
+ ENTRY(EBP) \
+ ENTRY(ESI) \
+ ENTRY(EDI) \
+ ENTRY(R8D) \
+ ENTRY(R9D) \
+ ENTRY(R10D) \
+ ENTRY(R11D) \
+ ENTRY(R12D) \
+ ENTRY(R13D) \
+ ENTRY(R14D) \
+ ENTRY(R15D)
+
+#define EA_BASES_64BIT \
+ ENTRY(RAX) \
+ ENTRY(RCX) \
+ ENTRY(RDX) \
+ ENTRY(RBX) \
+ ENTRY(sib64) \
+ ENTRY(RBP) \
+ ENTRY(RSI) \
+ ENTRY(RDI) \
+ ENTRY(R8) \
+ ENTRY(R9) \
+ ENTRY(R10) \
+ ENTRY(R11) \
+ ENTRY(R12) \
+ ENTRY(R13) \
+ ENTRY(R14) \
+ ENTRY(R15)
+
+#define REGS_64BIT \
+ ENTRY(RAX) \
+ ENTRY(RCX) \
+ ENTRY(RDX) \
+ ENTRY(RBX) \
+ ENTRY(RSP) \
+ ENTRY(RBP) \
+ ENTRY(RSI) \
+ ENTRY(RDI) \
+ ENTRY(R8) \
+ ENTRY(R9) \
+ ENTRY(R10) \
+ ENTRY(R11) \
+ ENTRY(R12) \
+ ENTRY(R13) \
+ ENTRY(R14) \
+ ENTRY(R15)
+
+#define REGS_MMX \
+ ENTRY(MM0) \
+ ENTRY(MM1) \
+ ENTRY(MM2) \
+ ENTRY(MM3) \
+ ENTRY(MM4) \
+ ENTRY(MM5) \
+ ENTRY(MM6) \
+ ENTRY(MM7)
+
+#define REGS_XMM \
+ ENTRY(XMM0) \
+ ENTRY(XMM1) \
+ ENTRY(XMM2) \
+ ENTRY(XMM3) \
+ ENTRY(XMM4) \
+ ENTRY(XMM5) \
+ ENTRY(XMM6) \
+ ENTRY(XMM7) \
+ ENTRY(XMM8) \
+ ENTRY(XMM9) \
+ ENTRY(XMM10) \
+ ENTRY(XMM11) \
+ ENTRY(XMM12) \
+ ENTRY(XMM13) \
+ ENTRY(XMM14) \
+ ENTRY(XMM15) \
+ ENTRY(XMM16) \
+ ENTRY(XMM17) \
+ ENTRY(XMM18) \
+ ENTRY(XMM19) \
+ ENTRY(XMM20) \
+ ENTRY(XMM21) \
+ ENTRY(XMM22) \
+ ENTRY(XMM23) \
+ ENTRY(XMM24) \
+ ENTRY(XMM25) \
+ ENTRY(XMM26) \
+ ENTRY(XMM27) \
+ ENTRY(XMM28) \
+ ENTRY(XMM29) \
+ ENTRY(XMM30) \
+ ENTRY(XMM31)
+
+#define REGS_YMM \
+ ENTRY(YMM0) \
+ ENTRY(YMM1) \
+ ENTRY(YMM2) \
+ ENTRY(YMM3) \
+ ENTRY(YMM4) \
+ ENTRY(YMM5) \
+ ENTRY(YMM6) \
+ ENTRY(YMM7) \
+ ENTRY(YMM8) \
+ ENTRY(YMM9) \
+ ENTRY(YMM10) \
+ ENTRY(YMM11) \
+ ENTRY(YMM12) \
+ ENTRY(YMM13) \
+ ENTRY(YMM14) \
+ ENTRY(YMM15) \
+ ENTRY(YMM16) \
+ ENTRY(YMM17) \
+ ENTRY(YMM18) \
+ ENTRY(YMM19) \
+ ENTRY(YMM20) \
+ ENTRY(YMM21) \
+ ENTRY(YMM22) \
+ ENTRY(YMM23) \
+ ENTRY(YMM24) \
+ ENTRY(YMM25) \
+ ENTRY(YMM26) \
+ ENTRY(YMM27) \
+ ENTRY(YMM28) \
+ ENTRY(YMM29) \
+ ENTRY(YMM30) \
+ ENTRY(YMM31)
+
+#define REGS_ZMM \
+ ENTRY(ZMM0) \
+ ENTRY(ZMM1) \
+ ENTRY(ZMM2) \
+ ENTRY(ZMM3) \
+ ENTRY(ZMM4) \
+ ENTRY(ZMM5) \
+ ENTRY(ZMM6) \
+ ENTRY(ZMM7) \
+ ENTRY(ZMM8) \
+ ENTRY(ZMM9) \
+ ENTRY(ZMM10) \
+ ENTRY(ZMM11) \
+ ENTRY(ZMM12) \
+ ENTRY(ZMM13) \
+ ENTRY(ZMM14) \
+ ENTRY(ZMM15) \
+ ENTRY(ZMM16) \
+ ENTRY(ZMM17) \
+ ENTRY(ZMM18) \
+ ENTRY(ZMM19) \
+ ENTRY(ZMM20) \
+ ENTRY(ZMM21) \
+ ENTRY(ZMM22) \
+ ENTRY(ZMM23) \
+ ENTRY(ZMM24) \
+ ENTRY(ZMM25) \
+ ENTRY(ZMM26) \
+ ENTRY(ZMM27) \
+ ENTRY(ZMM28) \
+ ENTRY(ZMM29) \
+ ENTRY(ZMM30) \
+ ENTRY(ZMM31)
+
+#define REGS_MASKS \
+ ENTRY(K0) \
+ ENTRY(K1) \
+ ENTRY(K2) \
+ ENTRY(K3) \
+ ENTRY(K4) \
+ ENTRY(K5) \
+ ENTRY(K6) \
+ ENTRY(K7)
+
+#define REGS_SEGMENT \
+ ENTRY(ES) \
+ ENTRY(CS) \
+ ENTRY(SS) \
+ ENTRY(DS) \
+ ENTRY(FS) \
+ ENTRY(GS)
+
+#define REGS_DEBUG \
+ ENTRY(DR0) \
+ ENTRY(DR1) \
+ ENTRY(DR2) \
+ ENTRY(DR3) \
+ ENTRY(DR4) \
+ ENTRY(DR5) \
+ ENTRY(DR6) \
+ ENTRY(DR7) \
+ ENTRY(DR8) \
+ ENTRY(DR9) \
+ ENTRY(DR10) \
+ ENTRY(DR11) \
+ ENTRY(DR12) \
+ ENTRY(DR13) \
+ ENTRY(DR14) \
+ ENTRY(DR15)
+
+#define REGS_CONTROL \
+ ENTRY(CR0) \
+ ENTRY(CR1) \
+ ENTRY(CR2) \
+ ENTRY(CR3) \
+ ENTRY(CR4) \
+ ENTRY(CR5) \
+ ENTRY(CR6) \
+ ENTRY(CR7) \
+ ENTRY(CR8) \
+ ENTRY(CR9) \
+ ENTRY(CR10) \
+ ENTRY(CR11) \
+ ENTRY(CR12) \
+ ENTRY(CR13) \
+ ENTRY(CR14) \
+ ENTRY(CR15)
+
+#define ALL_EA_BASES \
+ EA_BASES_16BIT \
+ EA_BASES_32BIT \
+ EA_BASES_64BIT
+
+#define ALL_SIB_BASES \
+ REGS_32BIT \
+ REGS_64BIT
+
+#define ALL_REGS \
+ REGS_8BIT \
+ REGS_16BIT \
+ REGS_32BIT \
+ REGS_64BIT \
+ REGS_MMX \
+ REGS_XMM \
+ REGS_YMM \
+ REGS_ZMM \
+ REGS_MASKS \
+ REGS_SEGMENT \
+ REGS_DEBUG \
+ REGS_CONTROL \
+ ENTRY(RIP)
+
+/// \brief All possible values of the base field for effective-address
+/// computations, a.k.a. the Mod and R/M fields of the ModR/M byte.
+/// We distinguish between bases (EA_BASE_*) and registers that just happen
+/// to be referred to when Mod == 0b11 (EA_REG_*).
+enum EABase {
+ EA_BASE_NONE,
+#define ENTRY(x) EA_BASE_##x,
+ ALL_EA_BASES
+#undef ENTRY
+#define ENTRY(x) EA_REG_##x,
+ ALL_REGS
+#undef ENTRY
+ EA_max
+};
+
+/// \brief All possible values of the SIB index field.
+/// borrows entries from ALL_EA_BASES with the special case that
+/// sib is synonymous with NONE.
+/// Vector SIB: index can be XMM or YMM.
+enum SIBIndex {
+ SIB_INDEX_NONE,
+#define ENTRY(x) SIB_INDEX_##x,
+ ALL_EA_BASES
+ REGS_XMM
+ REGS_YMM
+ REGS_ZMM
+#undef ENTRY
+ SIB_INDEX_max
+};
+
+/// \brief All possible values of the SIB base field.
+enum SIBBase {
+ SIB_BASE_NONE,
+#define ENTRY(x) SIB_BASE_##x,
+ ALL_SIB_BASES
+#undef ENTRY
+ SIB_BASE_max
+};
+
+/// \brief Possible displacement types for effective-address computations.
+typedef enum {
+ EA_DISP_NONE,
+ EA_DISP_8,
+ EA_DISP_16,
+ EA_DISP_32
+} EADisplacement;
+
+/// \brief All possible values of the reg field in the ModR/M byte.
+enum Reg {
+#define ENTRY(x) MODRM_REG_##x,
+ ALL_REGS
+#undef ENTRY
+ MODRM_REG_max
+};
+
+/// \brief All possible segment overrides.
+enum SegmentOverride {
+ SEG_OVERRIDE_NONE,
+ SEG_OVERRIDE_CS,
+ SEG_OVERRIDE_SS,
+ SEG_OVERRIDE_DS,
+ SEG_OVERRIDE_ES,
+ SEG_OVERRIDE_FS,
+ SEG_OVERRIDE_GS,
+ SEG_OVERRIDE_max
+};
+
+/// \brief Possible values for the VEX.m-mmmm field
+enum VEXLeadingOpcodeByte {
+ VEX_LOB_0F = 0x1,
+ VEX_LOB_0F38 = 0x2,
+ VEX_LOB_0F3A = 0x3
+};
+
+enum XOPMapSelect {
+ XOP_MAP_SELECT_8 = 0x8,
+ XOP_MAP_SELECT_9 = 0x9,
+ XOP_MAP_SELECT_A = 0xA
+};
+
+/// \brief Possible values for the VEX.pp/EVEX.pp field
+enum VEXPrefixCode {
+ VEX_PREFIX_NONE = 0x0,
+ VEX_PREFIX_66 = 0x1,
+ VEX_PREFIX_F3 = 0x2,
+ VEX_PREFIX_F2 = 0x3
+};
+
+enum VectorExtensionType {
+ TYPE_NO_VEX_XOP = 0x0,
+ TYPE_VEX_2B = 0x1,
+ TYPE_VEX_3B = 0x2,
+ TYPE_EVEX = 0x3,
+ TYPE_XOP = 0x4
+};
+
+/// \brief Type for the byte reader that the consumer must provide to
+/// the decoder. Reads a single byte from the instruction's address space.
+/// \param arg A baton that the consumer can associate with any internal
+/// state that it needs.
+/// \param byte A pointer to a single byte in memory that should be set to
+/// contain the value at address.
+/// \param address The address in the instruction's address space that should
+/// be read from.
+/// \return -1 if the byte cannot be read for any reason; 0 otherwise.
+typedef int (*byteReader_t)(const void *arg, uint8_t *byte, uint64_t address);
+
+/// \brief Type for the logging function that the consumer can provide to
+/// get debugging output from the decoder.
+/// \param arg A baton that the consumer can associate with any internal
+/// state that it needs.
+/// \param log A string that contains the message. Will be reused after
+/// the logger returns.
+typedef void (*dlog_t)(void *arg, const char *log);
+
+/// The specification for how to extract and interpret a full instruction and
+/// its operands.
+struct InstructionSpecifier {
+ uint16_t operands;
+};
+
+/// The x86 internal instruction, which is produced by the decoder.
+struct InternalInstruction {
+ // Reader interface (C)
+ byteReader_t reader;
+ // Opaque value passed to the reader
+ const void* readerArg;
+ // The address of the next byte to read via the reader
+ uint64_t readerCursor;
+
+ // Logger interface (C)
+ dlog_t dlog;
+ // Opaque value passed to the logger
+ void* dlogArg;
+
+ // General instruction information
+
+ // The mode to disassemble for (64-bit, protected, real)
+ DisassemblerMode mode;
+ // The start of the instruction, usable with the reader
+ uint64_t startLocation;
+ // The length of the instruction, in bytes
+ size_t length;
+
+ // Prefix state
+
+ // 1 if the prefix byte corresponding to the entry is present; 0 if not
+ uint8_t prefixPresent[0x100];
+ // contains the location (for use with the reader) of the prefix byte
+ uint64_t prefixLocations[0x100];
+ // The value of the vector extension prefix(EVEX/VEX/XOP), if present
+ uint8_t vectorExtensionPrefix[4];
+ // The type of the vector extension prefix
+ VectorExtensionType vectorExtensionType;
+ // The value of the REX prefix, if present
+ uint8_t rexPrefix;
+ // The location where a mandatory prefix would have to be (i.e., right before
+ // the opcode, or right before the REX prefix if one is present).
+ uint64_t necessaryPrefixLocation;
+ // The segment override type
+ SegmentOverride segmentOverride;
+ // 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease
+ bool xAcquireRelease;
+
+ // Sizes of various critical pieces of data, in bytes
+ uint8_t registerSize;
+ uint8_t addressSize;
+ uint8_t displacementSize;
+ uint8_t immediateSize;
+
+ // Offsets from the start of the instruction to the pieces of data, which is
+ // needed to find relocation entries for adding symbolic operands.
+ uint8_t displacementOffset;
+ uint8_t immediateOffset;
+
+ // opcode state
+
+ // The last byte of the opcode, not counting any ModR/M extension
+ uint8_t opcode;
+
+ // decode state
+
+ // The type of opcode, used for indexing into the array of decode tables
+ OpcodeType opcodeType;
+ // The instruction ID, extracted from the decode table
+ uint16_t instructionID;
+ // The specifier for the instruction, from the instruction info table
+ const InstructionSpecifier *spec;
+
+ // state for additional bytes, consumed during operand decode. Pattern:
+ // consumed___ indicates that the byte was already consumed and does not
+ // need to be consumed again.
+
+ // The VEX.vvvv field, which contains a third register operand for some AVX
+ // instructions.
+ Reg vvvv;
+
+ // The writemask for AVX-512 instructions which is contained in EVEX.aaa
+ Reg writemask;
+
+ // The ModR/M byte, which contains most register operands and some portion of
+ // all memory operands.
+ bool consumedModRM;
+ uint8_t modRM;
+
+ // The SIB byte, used for more complex 32- or 64-bit memory operands
+ bool consumedSIB;
+ uint8_t sib;
+
+ // The displacement, used for memory operands
+ bool consumedDisplacement;
+ int32_t displacement;
+
+ // Immediates. There can be two in some cases
+ uint8_t numImmediatesConsumed;
+ uint8_t numImmediatesTranslated;
+ uint64_t immediates[2];
+
+ // A register or immediate operand encoded into the opcode
+ Reg opcodeRegister;
+
+ // Portions of the ModR/M byte
+
+ // These fields determine the allowable values for the ModR/M fields, which
+ // depend on operand and address widths.
+ EABase eaBaseBase;
+ EABase eaRegBase;
+ Reg regBase;
+
+ // The Mod and R/M fields can encode a base for an effective address, or a
+ // register. These are separated into two fields here.
+ EABase eaBase;
+ EADisplacement eaDisplacement;
+ // The reg field always encodes a register
+ Reg reg;
+
+ // SIB state
+ SIBIndex sibIndex;
+ uint8_t sibScale;
+ SIBBase sibBase;
+
+ ArrayRef<OperandSpecifier> operands;
+};
+
+/// \brief Decode one instruction and store the decoding results in
+/// a buffer provided by the consumer.
+/// \param insn The buffer to store the instruction in. Allocated by the
+/// consumer.
+/// \param reader The byteReader_t for the bytes to be read.
+/// \param readerArg An argument to pass to the reader for storing context
+/// specific to the consumer. May be NULL.
+/// \param logger The dlog_t to be used in printing status messages from the
+/// disassembler. May be NULL.
+/// \param loggerArg An argument to pass to the logger for storing context
+/// specific to the logger. May be NULL.
+/// \param startLoc The address (in the reader's address space) of the first
+/// byte in the instruction.
+/// \param mode The mode (16-bit, 32-bit, 64-bit) to decode in.
+/// \return Nonzero if there was an error during decode, 0 otherwise.
+int decodeInstruction(InternalInstruction *insn,
+ byteReader_t reader,
+ const void *readerArg,
+ dlog_t logger,
+ void *loggerArg,
+ const void *miiArg,
+ uint64_t startLoc,
+ DisassemblerMode mode);
+
+/// \brief Print a message to debugs()
+/// \param file The name of the file printing the debug message.
+/// \param line The line number that printed the debug message.
+/// \param s The message to print.
+void Debug(const char *file, unsigned line, const char *s);
+
+const char *GetInstrName(unsigned Opcode, const void *mii);
+
+} // namespace X86Disassembler
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
new file mode 100644
index 000000000000..301db72feafb
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -0,0 +1,503 @@
+//===-- X86DisassemblerDecoderCommon.h - Disassembler decoder ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler.
+// It contains common definitions used by both the disassembler and the table
+// generator.
+// Documentation for the disassembler can be found in X86Disassembler.h.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODERCOMMON_H
+#define LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODERCOMMON_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+namespace X86Disassembler {
+
+#define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers
+#define CONTEXTS_SYM x86DisassemblerContexts
+#define ONEBYTE_SYM x86DisassemblerOneByteOpcodes
+#define TWOBYTE_SYM x86DisassemblerTwoByteOpcodes
+#define THREEBYTE38_SYM x86DisassemblerThreeByte38Opcodes
+#define THREEBYTE3A_SYM x86DisassemblerThreeByte3AOpcodes
+#define XOP8_MAP_SYM x86DisassemblerXOP8Opcodes
+#define XOP9_MAP_SYM x86DisassemblerXOP9Opcodes
+#define XOPA_MAP_SYM x86DisassemblerXOPAOpcodes
+
+#define INSTRUCTIONS_STR "x86DisassemblerInstrSpecifiers"
+#define CONTEXTS_STR "x86DisassemblerContexts"
+#define ONEBYTE_STR "x86DisassemblerOneByteOpcodes"
+#define TWOBYTE_STR "x86DisassemblerTwoByteOpcodes"
+#define THREEBYTE38_STR "x86DisassemblerThreeByte38Opcodes"
+#define THREEBYTE3A_STR "x86DisassemblerThreeByte3AOpcodes"
+#define XOP8_MAP_STR "x86DisassemblerXOP8Opcodes"
+#define XOP9_MAP_STR "x86DisassemblerXOP9Opcodes"
+#define XOPA_MAP_STR "x86DisassemblerXOPAOpcodes"
+
+// Attributes of an instruction that must be known before the opcode can be
+// processed correctly. Most of these indicate the presence of particular
+// prefixes, but ATTR_64BIT is simply an attribute of the decoding context.
+#define ATTRIBUTE_BITS \
+ ENUM_ENTRY(ATTR_NONE, 0x00) \
+ ENUM_ENTRY(ATTR_64BIT, (0x1 << 0)) \
+ ENUM_ENTRY(ATTR_XS, (0x1 << 1)) \
+ ENUM_ENTRY(ATTR_XD, (0x1 << 2)) \
+ ENUM_ENTRY(ATTR_REXW, (0x1 << 3)) \
+ ENUM_ENTRY(ATTR_OPSIZE, (0x1 << 4)) \
+ ENUM_ENTRY(ATTR_ADSIZE, (0x1 << 5)) \
+ ENUM_ENTRY(ATTR_VEX, (0x1 << 6)) \
+ ENUM_ENTRY(ATTR_VEXL, (0x1 << 7)) \
+ ENUM_ENTRY(ATTR_EVEX, (0x1 << 8)) \
+ ENUM_ENTRY(ATTR_EVEXL, (0x1 << 9)) \
+ ENUM_ENTRY(ATTR_EVEXL2, (0x1 << 10)) \
+ ENUM_ENTRY(ATTR_EVEXK, (0x1 << 11)) \
+ ENUM_ENTRY(ATTR_EVEXKZ, (0x1 << 12)) \
+ ENUM_ENTRY(ATTR_EVEXB, (0x1 << 13))
+
+#define ENUM_ENTRY(n, v) n = v,
+enum attributeBits {
+ ATTRIBUTE_BITS
+ ATTR_max
+};
+#undef ENUM_ENTRY
+
+// Combinations of the above attributes that are relevant to instruction
+// decode. Although other combinations are possible, they can be reduced to
+// these without affecting the ultimately decoded instruction.
+
+// Class name Rank Rationale for rank assignment
+#define INSTRUCTION_CONTEXTS \
+ ENUM_ENTRY(IC, 0, "says nothing about the instruction") \
+ ENUM_ENTRY(IC_64BIT, 1, "says the instruction applies in " \
+ "64-bit mode but no more") \
+ ENUM_ENTRY(IC_OPSIZE, 3, "requires an OPSIZE prefix, so " \
+ "operands change width") \
+ ENUM_ENTRY(IC_ADSIZE, 3, "requires an ADSIZE prefix, so " \
+ "operands change width") \
+ ENUM_ENTRY(IC_OPSIZE_ADSIZE, 4, "requires ADSIZE and OPSIZE prefixes") \
+ ENUM_ENTRY(IC_XD, 2, "may say something about the opcode " \
+ "but not the operands") \
+ ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \
+ "but not the operands") \
+ ENUM_ENTRY(IC_XD_OPSIZE, 3, "requires an OPSIZE prefix, so " \
+ "operands change width") \
+ ENUM_ENTRY(IC_XS_OPSIZE, 3, "requires an OPSIZE prefix, so " \
+ "operands change width") \
+ ENUM_ENTRY(IC_64BIT_REXW, 5, "requires a REX.W prefix, so operands "\
+ "change width; overrides IC_OPSIZE") \
+ ENUM_ENTRY(IC_64BIT_REXW_ADSIZE, 6, "requires a REX.W prefix and 0x67 " \
+ "prefix") \
+ ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \
+ ENUM_ENTRY(IC_64BIT_ADSIZE, 3, "Just as meaningful as IC_ADSIZE") \
+ ENUM_ENTRY(IC_64BIT_OPSIZE_ADSIZE, 4, "Just as meaningful as IC_OPSIZE/" \
+ "IC_ADSIZE") \
+ ENUM_ENTRY(IC_64BIT_XD, 6, "XD instructions are SSE; REX.W is " \
+ "secondary") \
+ ENUM_ENTRY(IC_64BIT_XS, 6, "Just as meaningful as IC_64BIT_XD") \
+ ENUM_ENTRY(IC_64BIT_XD_OPSIZE, 3, "Just as meaningful as IC_XD_OPSIZE") \
+ ENUM_ENTRY(IC_64BIT_XS_OPSIZE, 3, "Just as meaningful as IC_XS_OPSIZE") \
+ ENUM_ENTRY(IC_64BIT_REXW_XS, 7, "OPSIZE could mean a different " \
+ "opcode") \
+ ENUM_ENTRY(IC_64BIT_REXW_XD, 7, "Just as meaningful as " \
+ "IC_64BIT_REXW_XS") \
+ ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 8, "The Dynamic Duo! Prefer over all " \
+ "else because this changes most " \
+ "operands' meaning") \
+ ENUM_ENTRY(IC_VEX, 1, "requires a VEX prefix") \
+ ENUM_ENTRY(IC_VEX_XS, 2, "requires VEX and the XS prefix") \
+ ENUM_ENTRY(IC_VEX_XD, 2, "requires VEX and the XD prefix") \
+ ENUM_ENTRY(IC_VEX_OPSIZE, 2, "requires VEX and the OpSize prefix") \
+ ENUM_ENTRY(IC_VEX_W, 3, "requires VEX and the W prefix") \
+ ENUM_ENTRY(IC_VEX_W_XS, 4, "requires VEX, W, and XS prefix") \
+ ENUM_ENTRY(IC_VEX_W_XD, 4, "requires VEX, W, and XD prefix") \
+ ENUM_ENTRY(IC_VEX_W_OPSIZE, 4, "requires VEX, W, and OpSize") \
+ ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \
+ ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\
+ ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XD prefix")\
+ ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize") \
+ ENUM_ENTRY(IC_VEX_L_W, 4, "requires VEX, L and W") \
+ ENUM_ENTRY(IC_VEX_L_W_XS, 5, "requires VEX, L, W and XS prefix") \
+ ENUM_ENTRY(IC_VEX_L_W_XD, 5, "requires VEX, L, W and XD prefix") \
+ ENUM_ENTRY(IC_VEX_L_W_OPSIZE, 5, "requires VEX, L, W and OpSize") \
+ ENUM_ENTRY(IC_EVEX, 1, "requires an EVEX prefix") \
+ ENUM_ENTRY(IC_EVEX_XS, 2, "requires EVEX and the XS prefix") \
+ ENUM_ENTRY(IC_EVEX_XD, 2, "requires EVEX and the XD prefix") \
+ ENUM_ENTRY(IC_EVEX_OPSIZE, 2, "requires EVEX and the OpSize prefix") \
+ ENUM_ENTRY(IC_EVEX_W, 3, "requires EVEX and the W prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XS, 4, "requires EVEX, W, and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XD, 4, "requires EVEX, W, and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_W_OPSIZE, 4, "requires EVEX, W, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L, 3, "requires EVEX and the L prefix") \
+ ENUM_ENTRY(IC_EVEX_L_XS, 4, "requires EVEX and the L and XS prefix")\
+ ENUM_ENTRY(IC_EVEX_L_XD, 4, "requires EVEX and the L and XD prefix")\
+ ENUM_ENTRY(IC_EVEX_L_OPSIZE, 4, "requires EVEX, L, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L_W, 3, "requires EVEX, L and W") \
+ ENUM_ENTRY(IC_EVEX_L_W_XS, 4, "requires EVEX, L, W and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_L_W_XD, 4, "requires EVEX, L, W and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_L_W_OPSIZE, 4, "requires EVEX, L, W and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L2, 3, "requires EVEX and the L2 prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_XS, 4, "requires EVEX and the L2 and XS prefix")\
+ ENUM_ENTRY(IC_EVEX_L2_XD, 4, "requires EVEX and the L2 and XD prefix")\
+ ENUM_ENTRY(IC_EVEX_L2_OPSIZE, 4, "requires EVEX, L2, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L2_W, 3, "requires EVEX, L2 and W") \
+ ENUM_ENTRY(IC_EVEX_L2_W_XS, 4, "requires EVEX, L2, W and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_W_XD, 4, "requires EVEX, L2, W and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE, 4, "requires EVEX, L2, W and OpSize") \
+ ENUM_ENTRY(IC_EVEX_K, 1, "requires an EVEX_K prefix") \
+ ENUM_ENTRY(IC_EVEX_XS_K, 2, "requires EVEX_K and the XS prefix") \
+ ENUM_ENTRY(IC_EVEX_XD_K, 2, "requires EVEX_K and the XD prefix") \
+ ENUM_ENTRY(IC_EVEX_OPSIZE_K, 2, "requires EVEX_K and the OpSize prefix") \
+ ENUM_ENTRY(IC_EVEX_W_K, 3, "requires EVEX_K and the W prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XS_K, 4, "requires EVEX_K, W, and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XD_K, 4, "requires EVEX_K, W, and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_W_OPSIZE_K, 4, "requires EVEX_K, W, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L_K, 3, "requires EVEX_K and the L prefix") \
+ ENUM_ENTRY(IC_EVEX_L_XS_K, 4, "requires EVEX_K and the L and XS prefix")\
+ ENUM_ENTRY(IC_EVEX_L_XD_K, 4, "requires EVEX_K and the L and XD prefix")\
+ ENUM_ENTRY(IC_EVEX_L_OPSIZE_K, 4, "requires EVEX_K, L, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L_W_K, 3, "requires EVEX_K, L and W") \
+ ENUM_ENTRY(IC_EVEX_L_W_XS_K, 4, "requires EVEX_K, L, W and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_L_W_XD_K, 4, "requires EVEX_K, L, W and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_L_W_OPSIZE_K, 4, "requires EVEX_K, L, W and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L2_K, 3, "requires EVEX_K and the L2 prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_XS_K, 4, "requires EVEX_K and the L2 and XS prefix")\
+ ENUM_ENTRY(IC_EVEX_L2_XD_K, 4, "requires EVEX_K and the L2 and XD prefix")\
+ ENUM_ENTRY(IC_EVEX_L2_OPSIZE_K, 4, "requires EVEX_K, L2, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L2_W_K, 3, "requires EVEX_K, L2 and W") \
+ ENUM_ENTRY(IC_EVEX_L2_W_XS_K, 4, "requires EVEX_K, L2, W and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_W_XD_K, 4, "requires EVEX_K, L2, W and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_K, 4, "requires EVEX_K, L2, W and OpSize") \
+ ENUM_ENTRY(IC_EVEX_B, 1, "requires an EVEX_B prefix") \
+ ENUM_ENTRY(IC_EVEX_XS_B, 2, "requires EVEX_B and the XS prefix") \
+ ENUM_ENTRY(IC_EVEX_XD_B, 2, "requires EVEX_B and the XD prefix") \
+ ENUM_ENTRY(IC_EVEX_OPSIZE_B, 2, "requires EVEX_B and the OpSize prefix") \
+ ENUM_ENTRY(IC_EVEX_W_B, 3, "requires EVEX_B and the W prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XS_B, 4, "requires EVEX_B, W, and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XD_B, 4, "requires EVEX_B, W, and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_W_OPSIZE_B, 4, "requires EVEX_B, W, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L_B, 3, "requires EVEX_B and the L prefix") \
+ ENUM_ENTRY(IC_EVEX_L_XS_B, 4, "requires EVEX_B and the L and XS prefix")\
+ ENUM_ENTRY(IC_EVEX_L_XD_B, 4, "requires EVEX_B and the L and XD prefix")\
+ ENUM_ENTRY(IC_EVEX_L_OPSIZE_B, 4, "requires EVEX_B, L, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L_W_B, 3, "requires EVEX_B, L and W") \
+ ENUM_ENTRY(IC_EVEX_L_W_XS_B, 4, "requires EVEX_B, L, W and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_L_W_XD_B, 4, "requires EVEX_B, L, W and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_L_W_OPSIZE_B, 4, "requires EVEX_B, L, W and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L2_B, 3, "requires EVEX_B and the L2 prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_XS_B, 4, "requires EVEX_B and the L2 and XS prefix")\
+ ENUM_ENTRY(IC_EVEX_L2_XD_B, 4, "requires EVEX_B and the L2 and XD prefix")\
+ ENUM_ENTRY(IC_EVEX_L2_OPSIZE_B, 4, "requires EVEX_B, L2, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L2_W_B, 3, "requires EVEX_B, L2 and W") \
+ ENUM_ENTRY(IC_EVEX_L2_W_XS_B, 4, "requires EVEX_B, L2, W and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_W_XD_B, 4, "requires EVEX_B, L2, W and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_B, 4, "requires EVEX_B, L2, W and OpSize") \
+ ENUM_ENTRY(IC_EVEX_K_B, 1, "requires EVEX_B and EVEX_K prefix") \
+ ENUM_ENTRY(IC_EVEX_XS_K_B, 2, "requires EVEX_B, EVEX_K and the XS prefix") \
+ ENUM_ENTRY(IC_EVEX_XD_K_B, 2, "requires EVEX_B, EVEX_K and the XD prefix") \
+ ENUM_ENTRY(IC_EVEX_OPSIZE_K_B, 2, "requires EVEX_B, EVEX_K and the OpSize prefix") \
+ ENUM_ENTRY(IC_EVEX_W_K_B, 3, "requires EVEX_B, EVEX_K and the W prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XS_K_B, 4, "requires EVEX_B, EVEX_K, W, and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XD_K_B, 4, "requires EVEX_B, EVEX_K, W, and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_W_OPSIZE_K_B, 4, "requires EVEX_B, EVEX_K, W, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L_K_B, 3, "requires EVEX_B, EVEX_K and the L prefix") \
+ ENUM_ENTRY(IC_EVEX_L_XS_K_B, 4, "requires EVEX_B, EVEX_K and the L and XS prefix")\
+ ENUM_ENTRY(IC_EVEX_L_XD_K_B, 4, "requires EVEX_B, EVEX_K and the L and XD prefix")\
+ ENUM_ENTRY(IC_EVEX_L_OPSIZE_K_B, 4, "requires EVEX_B, EVEX_K, L, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L_W_K_B, 3, "requires EVEX_B, EVEX_K, L and W") \
+ ENUM_ENTRY(IC_EVEX_L_W_XS_K_B, 4, "requires EVEX_B, EVEX_K, L, W and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_L_W_XD_K_B, 4, "requires EVEX_B, EVEX_K, L, W and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_L_W_OPSIZE_K_B,4, "requires EVEX_B, EVEX_K, L, W and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L2_K_B, 3, "requires EVEX_B, EVEX_K and the L2 prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_XS_K_B, 4, "requires EVEX_B, EVEX_K and the L2 and XS prefix")\
+ ENUM_ENTRY(IC_EVEX_L2_XD_K_B, 4, "requires EVEX_B, EVEX_K and the L2 and XD prefix")\
+ ENUM_ENTRY(IC_EVEX_L2_OPSIZE_K_B, 4, "requires EVEX_B, EVEX_K, L2, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L2_W_K_B, 3, "requires EVEX_B, EVEX_K, L2 and W") \
+ ENUM_ENTRY(IC_EVEX_L2_W_XS_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_W_XD_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_K_B,4, "requires EVEX_B, EVEX_K, L2, W and OpSize") \
+ ENUM_ENTRY(IC_EVEX_KZ_B, 1, "requires EVEX_B and EVEX_KZ prefix") \
+ ENUM_ENTRY(IC_EVEX_XS_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the XS prefix") \
+ ENUM_ENTRY(IC_EVEX_XD_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the XD prefix") \
+ ENUM_ENTRY(IC_EVEX_OPSIZE_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the OpSize prefix") \
+ ENUM_ENTRY(IC_EVEX_W_KZ_B, 3, "requires EVEX_B, EVEX_KZ and the W prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_W_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L_KZ_B, 3, "requires EVEX_B, EVEX_KZ and the L prefix") \
+ ENUM_ENTRY(IC_EVEX_L_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L and XS prefix")\
+ ENUM_ENTRY(IC_EVEX_L_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L and XD prefix")\
+ ENUM_ENTRY(IC_EVEX_L_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L_W_KZ_B, 3, "requires EVEX_B, EVEX_KZ, L and W") \
+ ENUM_ENTRY(IC_EVEX_L_W_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, W and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_L_W_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, W and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_L_W_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, W and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L2_KZ_B, 3, "requires EVEX_B, EVEX_KZ and the L2 prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L2 and XS prefix")\
+ ENUM_ENTRY(IC_EVEX_L2_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L2 and XD prefix")\
+ ENUM_ENTRY(IC_EVEX_L2_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L2_W_KZ_B, 3, "requires EVEX_B, EVEX_KZ, L2 and W") \
+ ENUM_ENTRY(IC_EVEX_L2_W_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, W and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_W_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, W and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, W and OpSize") \
+ ENUM_ENTRY(IC_EVEX_KZ, 1, "requires an EVEX_KZ prefix") \
+ ENUM_ENTRY(IC_EVEX_XS_KZ, 2, "requires EVEX_KZ and the XS prefix") \
+ ENUM_ENTRY(IC_EVEX_XD_KZ, 2, "requires EVEX_KZ and the XD prefix") \
+ ENUM_ENTRY(IC_EVEX_OPSIZE_KZ, 2, "requires EVEX_KZ and the OpSize prefix") \
+ ENUM_ENTRY(IC_EVEX_W_KZ, 3, "requires EVEX_KZ and the W prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XS_KZ, 4, "requires EVEX_KZ, W, and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XD_KZ, 4, "requires EVEX_KZ, W, and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_W_OPSIZE_KZ, 4, "requires EVEX_KZ, W, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L_KZ, 3, "requires EVEX_KZ and the L prefix") \
+ ENUM_ENTRY(IC_EVEX_L_XS_KZ, 4, "requires EVEX_KZ and the L and XS prefix")\
+ ENUM_ENTRY(IC_EVEX_L_XD_KZ, 4, "requires EVEX_KZ and the L and XD prefix")\
+ ENUM_ENTRY(IC_EVEX_L_OPSIZE_KZ, 4, "requires EVEX_KZ, L, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L_W_KZ, 3, "requires EVEX_KZ, L and W") \
+ ENUM_ENTRY(IC_EVEX_L_W_XS_KZ, 4, "requires EVEX_KZ, L, W and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_L_W_XD_KZ, 4, "requires EVEX_KZ, L, W and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_L_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L, W and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L2_KZ, 3, "requires EVEX_KZ and the L2 prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_XS_KZ, 4, "requires EVEX_KZ and the L2 and XS prefix")\
+ ENUM_ENTRY(IC_EVEX_L2_XD_KZ, 4, "requires EVEX_KZ and the L2 and XD prefix")\
+ ENUM_ENTRY(IC_EVEX_L2_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L2_W_KZ, 3, "requires EVEX_KZ, L2 and W") \
+ ENUM_ENTRY(IC_EVEX_L2_W_XS_KZ, 4, "requires EVEX_KZ, L2, W and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_W_XD_KZ, 4, "requires EVEX_KZ, L2, W and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize")
+
+#define ENUM_ENTRY(n, r, d) n,
+enum InstructionContext {
+ INSTRUCTION_CONTEXTS
+ IC_max
+};
+#undef ENUM_ENTRY
+
+// Opcode types, which determine which decode table to use, both in the Intel
+// manual and also for the decoder.
+enum OpcodeType {
+ ONEBYTE = 0,
+ TWOBYTE = 1,
+ THREEBYTE_38 = 2,
+ THREEBYTE_3A = 3,
+ XOP8_MAP = 4,
+ XOP9_MAP = 5,
+ XOPA_MAP = 6
+};
+
+// The following structs are used for the hierarchical decode table. After
+// determining the instruction's class (i.e., which IC_* constant applies to
+// it), the decoder reads the opcode. Some instructions require specific
+// values of the ModR/M byte, so the ModR/M byte indexes into the final table.
+//
+// If a ModR/M byte is not required, "required" is left unset, and the values
+// for each instructionID are identical.
+typedef uint16_t InstrUID;
+
+// ModRMDecisionType - describes the type of ModR/M decision, allowing the
+// consumer to determine the number of entries in it.
+//
+// MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
+// instruction is the same.
+// MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode
+// corresponds to one instruction; otherwise, it corresponds to
+// a different instruction.
+// MODRM_SPLITMISC- If the ModR/M byte is between 0x00 and 0xbf, ModR/M byte
+// divided by 8 is used to select instruction; otherwise, each
+// value of the ModR/M byte could correspond to a different
+// instruction.
+// MODRM_SPLITREG - ModR/M byte divided by 8 is used to select instruction. This
+// corresponds to instructions that use reg field as opcode
+// MODRM_FULL - Potentially, each value of the ModR/M byte could correspond
+// to a different instruction.
+#define MODRMTYPES \
+ ENUM_ENTRY(MODRM_ONEENTRY) \
+ ENUM_ENTRY(MODRM_SPLITRM) \
+ ENUM_ENTRY(MODRM_SPLITMISC) \
+ ENUM_ENTRY(MODRM_SPLITREG) \
+ ENUM_ENTRY(MODRM_FULL)
+
+#define ENUM_ENTRY(n) n,
+enum ModRMDecisionType {
+ MODRMTYPES
+ MODRM_max
+};
+#undef ENUM_ENTRY
+
+#define CASE_ENCODING_RM \
+ case ENCODING_RM: \
+ case ENCODING_RM_CD2: \
+ case ENCODING_RM_CD4: \
+ case ENCODING_RM_CD8: \
+ case ENCODING_RM_CD16: \
+ case ENCODING_RM_CD32: \
+ case ENCODING_RM_CD64
+
+// Physical encodings of instruction operands.
+#define ENCODINGS \
+ ENUM_ENTRY(ENCODING_NONE, "") \
+ ENUM_ENTRY(ENCODING_REG, "Register operand in ModR/M byte.") \
+ ENUM_ENTRY(ENCODING_RM, "R/M operand in ModR/M byte.") \
+ ENUM_ENTRY(ENCODING_RM_CD2, "R/M operand with CDisp scaling of 2") \
+ ENUM_ENTRY(ENCODING_RM_CD4, "R/M operand with CDisp scaling of 4") \
+ ENUM_ENTRY(ENCODING_RM_CD8, "R/M operand with CDisp scaling of 8") \
+ ENUM_ENTRY(ENCODING_RM_CD16,"R/M operand with CDisp scaling of 16") \
+ ENUM_ENTRY(ENCODING_RM_CD32,"R/M operand with CDisp scaling of 32") \
+ ENUM_ENTRY(ENCODING_RM_CD64,"R/M operand with CDisp scaling of 64") \
+ ENUM_ENTRY(ENCODING_VVVV, "Register operand in VEX.vvvv byte.") \
+ ENUM_ENTRY(ENCODING_WRITEMASK, "Register operand in EVEX.aaa byte.") \
+ ENUM_ENTRY(ENCODING_CB, "1-byte code offset (possible new CS value)") \
+ ENUM_ENTRY(ENCODING_CW, "2-byte") \
+ ENUM_ENTRY(ENCODING_CD, "4-byte") \
+ ENUM_ENTRY(ENCODING_CP, "6-byte") \
+ ENUM_ENTRY(ENCODING_CO, "8-byte") \
+ ENUM_ENTRY(ENCODING_CT, "10-byte") \
+ ENUM_ENTRY(ENCODING_IB, "1-byte immediate") \
+ ENUM_ENTRY(ENCODING_IW, "2-byte") \
+ ENUM_ENTRY(ENCODING_ID, "4-byte") \
+ ENUM_ENTRY(ENCODING_IO, "8-byte") \
+ ENUM_ENTRY(ENCODING_RB, "(AL..DIL, R8L..R15L) Register code added to " \
+ "the opcode byte") \
+ ENUM_ENTRY(ENCODING_RW, "(AX..DI, R8W..R15W)") \
+ ENUM_ENTRY(ENCODING_RD, "(EAX..EDI, R8D..R15D)") \
+ ENUM_ENTRY(ENCODING_RO, "(RAX..RDI, R8..R15)") \
+ ENUM_ENTRY(ENCODING_FP, "Position on floating-point stack in ModR/M " \
+ "byte.") \
+ \
+ ENUM_ENTRY(ENCODING_Iv, "Immediate of operand size") \
+ ENUM_ENTRY(ENCODING_Ia, "Immediate of address size") \
+ ENUM_ENTRY(ENCODING_Rv, "Register code of operand size added to the " \
+ "opcode byte") \
+ ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \
+ "in type") \
+ ENUM_ENTRY(ENCODING_SI, "Source index; encoded in OpSize/Adsize prefix") \
+ ENUM_ENTRY(ENCODING_DI, "Destination index; encoded in prefixes")
+
+#define ENUM_ENTRY(n, d) n,
+enum OperandEncoding {
+ ENCODINGS
+ ENCODING_max
+};
+#undef ENUM_ENTRY
+
+// Semantic interpretations of instruction operands.
+#define TYPES \
+ ENUM_ENTRY(TYPE_NONE, "") \
+ ENUM_ENTRY(TYPE_REL8, "1-byte immediate address") \
+ ENUM_ENTRY(TYPE_REL16, "2-byte") \
+ ENUM_ENTRY(TYPE_REL32, "4-byte") \
+ ENUM_ENTRY(TYPE_REL64, "8-byte") \
+ ENUM_ENTRY(TYPE_PTR1616, "2+2-byte segment+offset address") \
+ ENUM_ENTRY(TYPE_PTR1632, "2+4-byte") \
+ ENUM_ENTRY(TYPE_PTR1664, "2+8-byte") \
+ ENUM_ENTRY(TYPE_R8, "1-byte register operand") \
+ ENUM_ENTRY(TYPE_R16, "2-byte") \
+ ENUM_ENTRY(TYPE_R32, "4-byte") \
+ ENUM_ENTRY(TYPE_R64, "8-byte") \
+ ENUM_ENTRY(TYPE_IMM8, "1-byte immediate operand") \
+ ENUM_ENTRY(TYPE_IMM16, "2-byte") \
+ ENUM_ENTRY(TYPE_IMM32, "4-byte") \
+ ENUM_ENTRY(TYPE_IMM64, "8-byte") \
+ ENUM_ENTRY(TYPE_IMM3, "1-byte immediate operand between 0 and 7") \
+ ENUM_ENTRY(TYPE_IMM5, "1-byte immediate operand between 0 and 31") \
+ ENUM_ENTRY(TYPE_AVX512ICC, "1-byte immediate operand for AVX512 icmp") \
+ ENUM_ENTRY(TYPE_UIMM8, "1-byte unsigned immediate operand") \
+ ENUM_ENTRY(TYPE_RM8, "1-byte register or memory operand") \
+ ENUM_ENTRY(TYPE_RM16, "2-byte") \
+ ENUM_ENTRY(TYPE_RM32, "4-byte") \
+ ENUM_ENTRY(TYPE_RM64, "8-byte") \
+ ENUM_ENTRY(TYPE_M, "Memory operand") \
+ ENUM_ENTRY(TYPE_M8, "1-byte") \
+ ENUM_ENTRY(TYPE_M16, "2-byte") \
+ ENUM_ENTRY(TYPE_M32, "4-byte") \
+ ENUM_ENTRY(TYPE_M64, "8-byte") \
+ ENUM_ENTRY(TYPE_LEA, "Effective address") \
+ ENUM_ENTRY(TYPE_M128, "16-byte (SSE/SSE2)") \
+ ENUM_ENTRY(TYPE_M256, "256-byte (AVX)") \
+ ENUM_ENTRY(TYPE_M1616, "2+2-byte segment+offset address") \
+ ENUM_ENTRY(TYPE_M1632, "2+4-byte") \
+ ENUM_ENTRY(TYPE_M1664, "2+8-byte") \
+ ENUM_ENTRY(TYPE_SRCIDX8, "1-byte memory at source index") \
+ ENUM_ENTRY(TYPE_SRCIDX16, "2-byte memory at source index") \
+ ENUM_ENTRY(TYPE_SRCIDX32, "4-byte memory at source index") \
+ ENUM_ENTRY(TYPE_SRCIDX64, "8-byte memory at source index") \
+ ENUM_ENTRY(TYPE_DSTIDX8, "1-byte memory at destination index") \
+ ENUM_ENTRY(TYPE_DSTIDX16, "2-byte memory at destination index") \
+ ENUM_ENTRY(TYPE_DSTIDX32, "4-byte memory at destination index") \
+ ENUM_ENTRY(TYPE_DSTIDX64, "8-byte memory at destination index") \
+ ENUM_ENTRY(TYPE_MOFFS8, "1-byte memory offset (relative to segment " \
+ "base)") \
+ ENUM_ENTRY(TYPE_MOFFS16, "2-byte") \
+ ENUM_ENTRY(TYPE_MOFFS32, "4-byte") \
+ ENUM_ENTRY(TYPE_MOFFS64, "8-byte") \
+ ENUM_ENTRY(TYPE_SREG, "Byte with single bit set: 0 = ES, 1 = CS, " \
+ "2 = SS, 3 = DS, 4 = FS, 5 = GS") \
+ ENUM_ENTRY(TYPE_M32FP, "32-bit IEE754 memory floating-point operand") \
+ ENUM_ENTRY(TYPE_M64FP, "64-bit") \
+ ENUM_ENTRY(TYPE_M80FP, "80-bit extended") \
+ ENUM_ENTRY(TYPE_ST, "Position on the floating-point stack") \
+ ENUM_ENTRY(TYPE_MM64, "8-byte MMX register") \
+ ENUM_ENTRY(TYPE_XMM, "XMM register operand") \
+ ENUM_ENTRY(TYPE_XMM32, "4-byte XMM register or memory operand") \
+ ENUM_ENTRY(TYPE_XMM64, "8-byte") \
+ ENUM_ENTRY(TYPE_XMM128, "16-byte") \
+ ENUM_ENTRY(TYPE_XMM256, "32-byte") \
+ ENUM_ENTRY(TYPE_XMM512, "64-byte") \
+ ENUM_ENTRY(TYPE_VK1, "1-bit") \
+ ENUM_ENTRY(TYPE_VK2, "2-bit") \
+ ENUM_ENTRY(TYPE_VK4, "4-bit") \
+ ENUM_ENTRY(TYPE_VK8, "8-bit") \
+ ENUM_ENTRY(TYPE_VK16, "16-bit") \
+ ENUM_ENTRY(TYPE_VK32, "32-bit") \
+ ENUM_ENTRY(TYPE_VK64, "64-bit") \
+ ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \
+ ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
+ ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \
+ ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \
+ ENUM_ENTRY(TYPE_BNDR, "MPX bounds register") \
+ \
+ ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \
+ ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \
+ ENUM_ENTRY(TYPE_IMMv, "Immediate operand of operand size") \
+ ENUM_ENTRY(TYPE_RELv, "Immediate address of operand size") \
+ ENUM_ENTRY(TYPE_DUP0, "Duplicate of operand 0") \
+ ENUM_ENTRY(TYPE_DUP1, "operand 1") \
+ ENUM_ENTRY(TYPE_DUP2, "operand 2") \
+ ENUM_ENTRY(TYPE_DUP3, "operand 3") \
+ ENUM_ENTRY(TYPE_DUP4, "operand 4") \
+ ENUM_ENTRY(TYPE_M512, "512-bit FPU/MMX/XMM/MXCSR state")
+
+#define ENUM_ENTRY(n, d) n,
+enum OperandType {
+ TYPES
+ TYPE_max
+};
+#undef ENUM_ENTRY
+
+/// \brief The specification for how to extract and interpret one operand.
+struct OperandSpecifier {
+ uint8_t encoding;
+ uint8_t type;
+};
+
+static const unsigned X86_MAX_OPERANDS = 6;
+
+/// Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode
+/// are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,
+/// respectively.
+enum DisassemblerMode {
+ MODE_16BIT,
+ MODE_32BIT,
+ MODE_64BIT
+};
+
+} // namespace X86Disassembler
+} // namespace llvm
+
+#endif