diff options
Diffstat (limited to 'llvm/lib/Target/X86')
24 files changed, 1077 insertions, 848 deletions
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h index 1f69feceae27..12134f7b00f1 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h @@ -21,7 +21,6 @@ namespace llvm { class X86Subtarget; class X86TargetMachine; -/// This class provides the information for the target register banks. class X86LegalizerInfo : public LegalizerInfo { private: /// Keep a reference to the X86Subtarget around so that we can diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index e006dd877360..304b998e1f26 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -148,25 +148,21 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) { case X86::AND16ri8: case X86::AND16rm: case X86::AND16rr: - case X86::AND16rr_REV: case X86::AND32i32: case X86::AND32ri: case X86::AND32ri8: case X86::AND32rm: case X86::AND32rr: - case X86::AND32rr_REV: case X86::AND64i32: case X86::AND64ri32: case X86::AND64ri8: case X86::AND64rm: case X86::AND64rr: - case X86::AND64rr_REV: case X86::AND8i8: case X86::AND8ri: case X86::AND8ri8: case X86::AND8rm: case X86::AND8rr: - case X86::AND8rr_REV: return FirstMacroFusionInstKind::And; // CMP case X86::CMP16i16: @@ -175,28 +171,24 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) { case X86::CMP16ri8: case X86::CMP16rm: case X86::CMP16rr: - case X86::CMP16rr_REV: case X86::CMP32i32: case X86::CMP32mr: case X86::CMP32ri: case X86::CMP32ri8: case X86::CMP32rm: case X86::CMP32rr: - case X86::CMP32rr_REV: case X86::CMP64i32: case X86::CMP64mr: case X86::CMP64ri32: case X86::CMP64ri8: case X86::CMP64rm: case X86::CMP64rr: - case X86::CMP64rr_REV: case X86::CMP8i8: case X86::CMP8mr: case X86::CMP8ri: case X86::CMP8ri8: case X86::CMP8rm: case X86::CMP8rr: - case X86::CMP8rr_REV: return FirstMacroFusionInstKind::Cmp; // ADD case X86::ADD16i16: @@ -204,50 +196,42 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) { case X86::ADD16ri8: case X86::ADD16rm: case X86::ADD16rr: - case X86::ADD16rr_REV: case X86::ADD32i32: case X86::ADD32ri: case X86::ADD32ri8: case X86::ADD32rm: case X86::ADD32rr: - case X86::ADD32rr_REV: case X86::ADD64i32: case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD64rm: case X86::ADD64rr: - case X86::ADD64rr_REV: case X86::ADD8i8: case X86::ADD8ri: case X86::ADD8ri8: case X86::ADD8rm: case X86::ADD8rr: - case X86::ADD8rr_REV: // SUB case X86::SUB16i16: case X86::SUB16ri: case X86::SUB16ri8: case X86::SUB16rm: case X86::SUB16rr: - case X86::SUB16rr_REV: case X86::SUB32i32: case X86::SUB32ri: case X86::SUB32ri8: case X86::SUB32rm: case X86::SUB32rr: - case X86::SUB32rr_REV: case X86::SUB64i32: case X86::SUB64ri32: case X86::SUB64ri8: case X86::SUB64rm: case X86::SUB64rr: - case X86::SUB64rr_REV: case X86::SUB8i8: case X86::SUB8ri: case X86::SUB8ri8: case X86::SUB8rm: case X86::SUB8rr: - case X86::SUB8rr_REV: return FirstMacroFusionInstKind::AddSub; // INC case X86::INC16r: diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 924956295e7c..f7c361393fea 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -1650,6 +1650,9 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) ++SrcRegNum; + if (IsND) // Skip new data destination + ++CurOp; + emitRegModRMByte(MI.getOperand(SrcRegNum), getX86RegNum(MI.getOperand(CurOp)), CB); CurOp = SrcRegNum + 1; diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 485afbc1dfbc..21623a805f55 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -131,9 +131,9 @@ FunctionPass *createX86FixupBWInsts(); /// to another, when profitable. FunctionPass *createX86DomainReassignmentPass(); -/// This pass replaces EVEX encoded of AVX-512 instructiosn by VEX -/// encoding when possible in order to reduce code size. -FunctionPass *createX86EvexToVexInsts(); +/// This pass compress instructions from EVEX space to legacy/VEX/EVEX space when +/// possible in order to reduce code size or facilitate HW decoding. +FunctionPass *createX86CompressEVEXPass(); /// This pass creates the thunks for the retpoline feature. FunctionPass *createX86IndirectThunksPass(); @@ -167,7 +167,7 @@ FunctionPass *createX86SpeculativeLoadHardeningPass(); FunctionPass *createX86SpeculativeExecutionSideEffectSuppression(); FunctionPass *createX86ArgumentStackSlotPass(); -void initializeEvexToVexInstPassPass(PassRegistry &); +void initializeCompressEVEXPassPass(PassRegistry &); void initializeFPSPass(PassRegistry &); void initializeFixupBWInstPassPass(PassRegistry &); void initializeFixupLEAPassPass(PassRegistry &); diff --git a/llvm/lib/Target/X86/X86EvexToVex.cpp b/llvm/lib/Target/X86/X86CompressEVEX.cpp index c425c37b4186..b95baddd9dea 100644 --- a/llvm/lib/Target/X86/X86EvexToVex.cpp +++ b/llvm/lib/Target/X86/X86CompressEVEX.cpp @@ -1,5 +1,4 @@ -//===- X86EvexToVex.cpp ---------------------------------------------------===// -// Compress EVEX instructions to VEX encoding when possible to reduce code size +//===- X86CompressEVEX.cpp ------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,17 +6,30 @@ // //===----------------------------------------------------------------------===// // -/// \file -/// This file defines the pass that goes over all AVX-512 instructions which -/// are encoded using the EVEX prefix and if possible replaces them by their -/// corresponding VEX encoding which is usually shorter by 2 bytes. -/// EVEX instructions may be encoded via the VEX prefix when the AVX-512 -/// instruction has a corresponding AVX/AVX2 opcode, when vector length -/// accessed by instruction is less than 512 bits and when it does not use -// the xmm or the mask registers or xmm/ymm registers with indexes higher -// than 15. -/// The pass applies code reduction on the generated code for AVX-512 instrs. +// This pass compresses instructions from EVEX space to legacy/VEX/EVEX space +// when possible in order to reduce code size or facilitate HW decoding. // +// Possible compression: +// a. AVX512 instruction (EVEX) -> AVX instruction (VEX) +// b. Promoted instruction (EVEX) -> pre-promotion instruction (legacy/VEX) +// c. NDD (EVEX) -> non-NDD (legacy) +// d. NF_ND (EVEX) -> NF (EVEX) +// +// Compression a, b and c can always reduce code size, with some exceptions +// such as promoted 16-bit CRC32 which is as long as the legacy version. +// +// legacy: +// crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6] +// promoted: +// crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6] +// +// From performance perspective, these should be same (same uops and same EXE +// ports). From a FMV perspective, an older legacy encoding is preferred b/c it +// can execute in more places (broader HW install base). So we will still do +// the compression. +// +// Compression d can help hardware decode (HW may skip reading the NDD +// register) although the instruction length remains unchanged. //===----------------------------------------------------------------------===// #include "MCTargetDesc/X86BaseInfo.h" @@ -38,37 +50,34 @@ using namespace llvm; -// Including the generated EVEX2VEX tables. -struct X86EvexToVexCompressTableEntry { - uint16_t EvexOpc; - uint16_t VexOpc; +// Including the generated EVEX compression tables. +struct X86CompressEVEXTableEntry { + uint16_t OldOpc; + uint16_t NewOpc; - bool operator<(const X86EvexToVexCompressTableEntry &RHS) const { - return EvexOpc < RHS.EvexOpc; + bool operator<(const X86CompressEVEXTableEntry &RHS) const { + return OldOpc < RHS.OldOpc; } - friend bool operator<(const X86EvexToVexCompressTableEntry &TE, - unsigned Opc) { - return TE.EvexOpc < Opc; + friend bool operator<(const X86CompressEVEXTableEntry &TE, unsigned Opc) { + return TE.OldOpc < Opc; } }; -#include "X86GenEVEX2VEXTables.inc" +#include "X86GenCompressEVEXTables.inc" -#define EVEX2VEX_DESC "Compressing EVEX instrs to VEX encoding when possible" -#define EVEX2VEX_NAME "x86-evex-to-vex-compress" +#define COMP_EVEX_DESC "Compressing EVEX instrs when possible" +#define COMP_EVEX_NAME "x86-compress-evex" -#define DEBUG_TYPE EVEX2VEX_NAME +#define DEBUG_TYPE COMP_EVEX_NAME namespace { -class EvexToVexInstPass : public MachineFunctionPass { +class CompressEVEXPass : public MachineFunctionPass { public: static char ID; - EvexToVexInstPass() : MachineFunctionPass(ID) {} - StringRef getPassName() const override { return EVEX2VEX_DESC; } + CompressEVEXPass() : MachineFunctionPass(ID) {} + StringRef getPassName() const override { return COMP_EVEX_DESC; } - /// Loop over all of the basic blocks, replacing EVEX instructions - /// by equivalent VEX instructions when possible for reducing code size. bool runOnMachineFunction(MachineFunction &MF) override; // This pass runs after regalloc and doesn't support VReg operands. @@ -80,7 +89,7 @@ public: } // end anonymous namespace -char EvexToVexInstPass::ID = 0; +char CompressEVEXPass::ID = 0; static bool usesExtendedRegister(const MachineInstr &MI) { auto isHiRegIdx = [](unsigned Reg) { @@ -112,8 +121,8 @@ static bool usesExtendedRegister(const MachineInstr &MI) { return false; } -static bool checkVEXInstPredicate(unsigned EvexOpc, const X86Subtarget &ST) { - switch (EvexOpc) { +static bool checkVEXInstPredicate(unsigned OldOpc, const X86Subtarget &ST) { + switch (OldOpc) { default: return true; case X86::VCVTNEPS2BF16Z128rm: @@ -151,15 +160,15 @@ static bool checkVEXInstPredicate(unsigned EvexOpc, const X86Subtarget &ST) { } // Do any custom cleanup needed to finalize the conversion. -static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) { - (void)VexOpc; +static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) { + (void)NewOpc; unsigned Opc = MI.getOpcode(); switch (Opc) { case X86::VALIGNDZ128rri: case X86::VALIGNDZ128rmi: case X86::VALIGNQZ128rri: case X86::VALIGNQZ128rmi: { - assert((VexOpc == X86::VPALIGNRrri || VexOpc == X86::VPALIGNRrmi) && + assert((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) && "Unexpected new opcode!"); unsigned Scale = (Opc == X86::VALIGNQZ128rri || Opc == X86::VALIGNQZ128rmi) ? 8 : 4; @@ -175,8 +184,8 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) { case X86::VSHUFI32X4Z256rri: case X86::VSHUFI64X2Z256rmi: case X86::VSHUFI64X2Z256rri: { - assert((VexOpc == X86::VPERM2F128rr || VexOpc == X86::VPERM2I128rr || - VexOpc == X86::VPERM2F128rm || VexOpc == X86::VPERM2I128rm) && + assert((NewOpc == X86::VPERM2F128rr || NewOpc == X86::VPERM2I128rr || + NewOpc == X86::VPERM2F128rm || NewOpc == X86::VPERM2I128rm) && "Unexpected new opcode!"); MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1); int64_t ImmVal = Imm.getImm(); @@ -200,7 +209,7 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) { case X86::VRNDSCALESDZm_Int: case X86::VRNDSCALESSZr_Int: case X86::VRNDSCALESSZm_Int: - const MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1); + const MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1); int64_t ImmVal = Imm.getImm(); // Ensure that only bits 3:0 of the immediate are used. if ((ImmVal & 0xf) != ImmVal) @@ -211,86 +220,77 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) { return true; } -// For EVEX instructions that can be encoded using VEX encoding -// replace them by the VEX encoding in order to reduce size. -static bool CompressEvexToVexImpl(MachineInstr &MI, const X86Subtarget &ST) { - // VEX format. - // # of bytes: 0,2,3 1 1 0,1 0,1,2,4 0,1 - // [Prefixes] [VEX] OPCODE ModR/M [SIB] [DISP] [IMM] - // - // EVEX format. - // # of bytes: 4 1 1 1 4 / 1 1 - // [Prefixes] EVEX Opcode ModR/M [SIB] [Disp32] / [Disp8*N] [Immediate] - const MCInstrDesc &Desc = MI.getDesc(); +static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) { + uint64_t TSFlags = MI.getDesc().TSFlags; // Check for EVEX instructions only. - if ((Desc.TSFlags & X86II::EncodingMask) != X86II::EVEX) + if ((TSFlags & X86II::EncodingMask) != X86II::EVEX) return false; - // Check for EVEX instructions with mask or broadcast as in these cases - // the EVEX prefix is needed in order to carry this information - // thus preventing the transformation to VEX encoding. - if (Desc.TSFlags & (X86II::EVEX_K | X86II::EVEX_B)) + // Instructions with mask or 512-bit vector can't be converted to VEX. + if (TSFlags & (X86II::EVEX_K | X86II::EVEX_L2)) return false; - // Check for EVEX instructions with L2 set. These instructions are 512-bits - // and can't be converted to VEX. - if (Desc.TSFlags & X86II::EVEX_L2) + // EVEX_B has several meanings. + // AVX512: + // register form: rounding control or SAE + // memory form: broadcast + // + // APX: + // MAP4: NDD + // + // For AVX512 cases, EVEX prefix is needed in order to carry this information + // thus preventing the transformation to VEX encoding. + if (TSFlags & X86II::EVEX_B) return false; - // Use the VEX.L bit to select the 128 or 256-bit table. - ArrayRef<X86EvexToVexCompressTableEntry> Table = - (Desc.TSFlags & X86II::VEX_L) ? ArrayRef(X86EvexToVex256CompressTable) - : ArrayRef(X86EvexToVex128CompressTable); + ArrayRef<X86CompressEVEXTableEntry> Table = ArrayRef(X86CompressEVEXTable); - unsigned EvexOpc = MI.getOpcode(); - const auto *I = llvm::lower_bound(Table, EvexOpc); - if (I == Table.end() || I->EvexOpc != EvexOpc) + unsigned Opc = MI.getOpcode(); + const auto *I = llvm::lower_bound(Table, Opc); + if (I == Table.end() || I->OldOpc != Opc) return false; - if (usesExtendedRegister(MI)) - return false; - if (!checkVEXInstPredicate(EvexOpc, ST)) - return false; - if (!performCustomAdjustments(MI, I->VexOpc)) + if (usesExtendedRegister(MI) || !checkVEXInstPredicate(Opc, ST) || + !performCustomAdjustments(MI, I->NewOpc)) return false; - MI.setDesc(ST.getInstrInfo()->get(I->VexOpc)); - MI.setAsmPrinterFlag(X86::AC_EVEX_2_VEX); + const MCInstrDesc &NewDesc = ST.getInstrInfo()->get(I->NewOpc); + MI.setDesc(NewDesc); + uint64_t Encoding = NewDesc.TSFlags & X86II::EncodingMask; + auto AsmComment = + (Encoding == X86II::VEX) ? X86::AC_EVEX_2_VEX : X86::AC_EVEX_2_LEGACY; + MI.setAsmPrinterFlag(AsmComment); return true; } -bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) { +bool CompressEVEXPass::runOnMachineFunction(MachineFunction &MF) { #ifndef NDEBUG // Make sure the tables are sorted. static std::atomic<bool> TableChecked(false); if (!TableChecked.load(std::memory_order_relaxed)) { - assert(llvm::is_sorted(X86EvexToVex128CompressTable) && - "X86EvexToVex128CompressTable is not sorted!"); - assert(llvm::is_sorted(X86EvexToVex256CompressTable) && - "X86EvexToVex256CompressTable is not sorted!"); + assert(llvm::is_sorted(X86CompressEVEXTable) && + "X86CompressEVEXTable is not sorted!"); TableChecked.store(true, std::memory_order_relaxed); } #endif const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); - if (!ST.hasAVX512()) + if (!ST.hasAVX512() && !ST.hasEGPR() && !ST.hasNDD()) return false; bool Changed = false; - /// Go over all basic blocks in function and replace - /// EVEX encoded instrs by VEX encoding when possible. for (MachineBasicBlock &MBB : MF) { // Traverse the basic block. for (MachineInstr &MI : MBB) - Changed |= CompressEvexToVexImpl(MI, ST); + Changed |= CompressEVEXImpl(MI, ST); } return Changed; } -INITIALIZE_PASS(EvexToVexInstPass, EVEX2VEX_NAME, EVEX2VEX_DESC, false, false) +INITIALIZE_PASS(CompressEVEXPass, COMP_EVEX_NAME, COMP_EVEX_DESC, false, false) -FunctionPass *llvm::createX86EvexToVexInsts() { - return new EvexToVexInstPass(); +FunctionPass *llvm::createX86CompressEVEXPass() { + return new CompressEVEXPass(); } diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp index bdd86e48fa54..20dbaf797e32 100644 --- a/llvm/lib/Target/X86/X86DomainReassignment.cpp +++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp @@ -619,40 +619,30 @@ void X86DomainReassignment::initConverters() { std::make_unique<InstrReplacerDstCOPY>(From, To); }; - bool HasEGPR = STI->hasEGPR(); - createReplacerDstCOPY(X86::MOVZX32rm16, - HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm); - createReplacerDstCOPY(X86::MOVZX64rm16, - HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm); +#define GET_EGPR_IF_ENABLED(OPC) STI->hasEGPR() ? OPC##_EVEX : OPC + createReplacerDstCOPY(X86::MOVZX32rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); + createReplacerDstCOPY(X86::MOVZX64rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); - createReplacerDstCOPY(X86::MOVZX32rr16, - HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk); - createReplacerDstCOPY(X86::MOVZX64rr16, - HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk); + createReplacerDstCOPY(X86::MOVZX32rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); + createReplacerDstCOPY(X86::MOVZX64rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); if (STI->hasDQI()) { - createReplacerDstCOPY(X86::MOVZX16rm8, - HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm); - createReplacerDstCOPY(X86::MOVZX32rm8, - HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm); - createReplacerDstCOPY(X86::MOVZX64rm8, - HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm); - - createReplacerDstCOPY(X86::MOVZX16rr8, - HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk); - createReplacerDstCOPY(X86::MOVZX32rr8, - HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk); - createReplacerDstCOPY(X86::MOVZX64rr8, - HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk); + createReplacerDstCOPY(X86::MOVZX16rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); + createReplacerDstCOPY(X86::MOVZX32rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); + createReplacerDstCOPY(X86::MOVZX64rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); + + createReplacerDstCOPY(X86::MOVZX16rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); + createReplacerDstCOPY(X86::MOVZX32rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); + createReplacerDstCOPY(X86::MOVZX64rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); } auto createReplacer = [&](unsigned From, unsigned To) { Converters[{MaskDomain, From}] = std::make_unique<InstrReplacer>(From, To); }; - createReplacer(X86::MOV16rm, HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm); - createReplacer(X86::MOV16mr, HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk); - createReplacer(X86::MOV16rr, HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk); + createReplacer(X86::MOV16rm, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); + createReplacer(X86::MOV16mr, GET_EGPR_IF_ENABLED(X86::KMOVWmk)); + createReplacer(X86::MOV16rr, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); createReplacer(X86::SHR16ri, X86::KSHIFTRWri); createReplacer(X86::SHL16ri, X86::KSHIFTLWri); createReplacer(X86::NOT16r, X86::KNOTWrr); @@ -661,14 +651,14 @@ void X86DomainReassignment::initConverters() { createReplacer(X86::XOR16rr, X86::KXORWrr); if (STI->hasBWI()) { - createReplacer(X86::MOV32rm, HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm); - createReplacer(X86::MOV64rm, HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm); + createReplacer(X86::MOV32rm, GET_EGPR_IF_ENABLED(X86::KMOVDkm)); + createReplacer(X86::MOV64rm, GET_EGPR_IF_ENABLED(X86::KMOVQkm)); - createReplacer(X86::MOV32mr, HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk); - createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk); + createReplacer(X86::MOV32mr, GET_EGPR_IF_ENABLED(X86::KMOVDmk)); + createReplacer(X86::MOV64mr, GET_EGPR_IF_ENABLED(X86::KMOVQmk)); - createReplacer(X86::MOV32rr, HasEGPR ? X86::KMOVDkk_EVEX : X86::KMOVDkk); - createReplacer(X86::MOV64rr, HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk); + createReplacer(X86::MOV32rr, GET_EGPR_IF_ENABLED(X86::KMOVDkk)); + createReplacer(X86::MOV64rr, GET_EGPR_IF_ENABLED(X86::KMOVQkk)); createReplacer(X86::SHR32ri, X86::KSHIFTRDri); createReplacer(X86::SHR64ri, X86::KSHIFTRQri); @@ -696,8 +686,8 @@ void X86DomainReassignment::initConverters() { // TODO: KTEST is not a replacement for TEST due to flag differences. Need // to prove only Z flag is used. - //createReplacer(X86::TEST32rr, X86::KTESTDrr); - //createReplacer(X86::TEST64rr, X86::KTESTQrr); + // createReplacer(X86::TEST32rr, X86::KTESTDrr); + // createReplacer(X86::TEST64rr, X86::KTESTQrr); } if (STI->hasDQI()) { @@ -706,9 +696,9 @@ void X86DomainReassignment::initConverters() { createReplacer(X86::AND8rr, X86::KANDBrr); - createReplacer(X86::MOV8rm, HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm); - createReplacer(X86::MOV8mr, HasEGPR ? X86::KMOVBmk_EVEX : X86::KMOVBmk); - createReplacer(X86::MOV8rr, HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk); + createReplacer(X86::MOV8rm, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); + createReplacer(X86::MOV8mr, GET_EGPR_IF_ENABLED(X86::KMOVBmk)); + createReplacer(X86::MOV8rr, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); createReplacer(X86::NOT8r, X86::KNOTBrr); @@ -719,11 +709,12 @@ void X86DomainReassignment::initConverters() { // TODO: KTEST is not a replacement for TEST due to flag differences. Need // to prove only Z flag is used. - //createReplacer(X86::TEST8rr, X86::KTESTBrr); - //createReplacer(X86::TEST16rr, X86::KTESTWrr); + // createReplacer(X86::TEST8rr, X86::KTESTBrr); + // createReplacer(X86::TEST16rr, X86::KTESTWrr); createReplacer(X86::XOR8rr, X86::KXORBrr); } +#undef GET_EGPR_IF_ENABLED } bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 0ba31e173a1a..1ce1e6f6a563 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -916,7 +916,7 @@ redo_gep: // A array/variable index is always of the form i*S where S is the // constant scale size. See if we can push the scale into immediates. - uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); + uint64_t S = GTI.getSequentialElementStride(DL); for (;;) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { // Constant-offset addressing. @@ -3046,22 +3046,24 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { switch (II->getIntrinsicID()) { default: llvm_unreachable("Unexpected intrinsic."); +#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC case Intrinsic::x86_sse42_crc32_32_8: - Opc = X86::CRC32r32r8; + Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r8); RC = &X86::GR32RegClass; break; case Intrinsic::x86_sse42_crc32_32_16: - Opc = X86::CRC32r32r16; + Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r16); RC = &X86::GR32RegClass; break; case Intrinsic::x86_sse42_crc32_32_32: - Opc = X86::CRC32r32r32; + Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r32); RC = &X86::GR32RegClass; break; case Intrinsic::x86_sse42_crc32_64_64: - Opc = X86::CRC32r64r64; + Opc = GET_EGPR_IF_ENABLED(X86::CRC32r64r64); RC = &X86::GR64RegClass; break; +#undef GET_EGPR_IF_ENABLED } const Value *LHS = II->getArgOperand(0); diff --git a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp index b13bf361ab79..aad839b83ee1 100644 --- a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp +++ b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp @@ -173,7 +173,6 @@ static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) { #define LLVM_EXPAND_ADC_SBB_INSTR(MNEMONIC) \ LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr) \ - LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr_REV) \ LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rm) \ LLVM_EXPAND_INSTR_SIZES(MNEMONIC, mr) \ case X86::MNEMONIC##8ri: \ diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 73b10cf3067e..53ce720be2da 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2852,7 +2852,7 @@ bool X86DAGToDAGISel::selectVectorAddr(MemSDNode *Parent, SDValue BasePtr, SDValue &Index, SDValue &Disp, SDValue &Segment) { X86ISelAddressMode AM; - AM.Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue(); + AM.Scale = ScaleOp->getAsZExtVal(); // Attempt to match index patterns, as long as we're not relying on implicit // sign-extension, which is performed BEFORE scale. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1e4b1361f98a..5a28240ea9e2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7371,7 +7371,7 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, /// index. static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec, SDValue ExtIdx) { - int Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue(); + int Idx = ExtIdx->getAsZExtVal(); if (!isa<ShuffleVectorSDNode>(ExtractedFromVec)) return Idx; @@ -7475,10 +7475,12 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) { static SDValue LowerBUILD_VECTORvXbf16(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); - MVT IVT = VT.changeVectorElementTypeToInteger(); + MVT IVT = + VT.changeVectorElementType(Subtarget.hasFP16() ? MVT::f16 : MVT::i16); SmallVector<SDValue, 16> NewOps; for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) - NewOps.push_back(DAG.getBitcast(MVT::i16, Op.getOperand(I))); + NewOps.push_back(DAG.getBitcast(Subtarget.hasFP16() ? MVT::f16 : MVT::i16, + Op.getOperand(I))); SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), IVT, NewOps); return DAG.getBitcast(VT, Res); } @@ -8793,7 +8795,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MachinePointerInfo MPI = MachinePointerInfo::getConstantPool(MF); SDValue Ld = DAG.getLoad(VT, dl, DAG.getEntryNode(), LegalDAGConstVec, MPI); - unsigned InsertC = cast<ConstantSDNode>(InsIndex)->getZExtValue(); + unsigned InsertC = InsIndex->getAsZExtVal(); unsigned NumEltsInLow128Bits = 128 / VT.getScalarSizeInBits(); if (InsertC < NumEltsInLow128Bits) return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ld, VarElt, InsIndex); @@ -14369,6 +14371,13 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { + if (VT == MVT::v8bf16) { + V1 = DAG.getBitcast(MVT::v8i16, V1); + V2 = DAG.getBitcast(MVT::v8i16, V2); + return DAG.getBitcast(VT, + DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, Mask)); + } + switch (VT.SimpleTy) { case MVT::v2i64: return lowerV2I64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); @@ -17096,14 +17105,14 @@ static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG, /*SimpleOnly*/ false); } - if (VT == MVT::v32f16) { + if (VT == MVT::v32f16 || VT == MVT::v32bf16) { if (!Subtarget.hasBWI()) return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG, /*SimpleOnly*/ false); V1 = DAG.getBitcast(MVT::v32i16, V1); V2 = DAG.getBitcast(MVT::v32i16, V2); - return DAG.getBitcast(MVT::v32f16, + return DAG.getBitcast(VT, DAG.getVectorShuffle(MVT::v32i16, DL, V1, V2, Mask)); } @@ -17747,7 +17756,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Vec), Idx)); - unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + unsigned IdxVal = Idx->getAsZExtVal(); SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32, Vec, DAG.getTargetConstant(IdxVal, dl, MVT::i8)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract); @@ -21515,9 +21524,8 @@ SDValue X86TargetLowering::LowerFP_TO_BF16(SDValue Op, RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16); SDValue Res = - makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first; - return DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, - DAG.getBitcast(MVT::i32, Res)); + makeLibCall(DAG, LC, MVT::f16, Op.getOperand(0), CallOptions, DL).first; + return DAG.getBitcast(MVT::i16, Res); } /// Depending on uarch and/or optimizing for size, we might prefer to use a @@ -24061,7 +24069,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // a >= b ? -1 : 0 -> RES = setcc_carry // a >= b ? 0 : -1 -> RES = ~setcc_carry if (Cond.getOpcode() == X86ISD::SUB) { - unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue(); + unsigned CondCode = CC->getAsZExtVal(); if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) && (isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && @@ -25359,8 +25367,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, if (IntrData->Type == INTR_TYPE_3OP_IMM8 && Src3.getValueType() != MVT::i8) { - Src3 = DAG.getTargetConstant( - cast<ConstantSDNode>(Src3)->getZExtValue() & 0xff, dl, MVT::i8); + Src3 = DAG.getTargetConstant(Src3->getAsZExtVal() & 0xff, dl, MVT::i8); } // We specify 2 possible opcodes for intrinsics with rounding modes. @@ -25385,8 +25392,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, assert(Op.getOperand(4)->getOpcode() == ISD::TargetConstant); SDValue Src4 = Op.getOperand(4); if (Src4.getValueType() != MVT::i8) { - Src4 = DAG.getTargetConstant( - cast<ConstantSDNode>(Src4)->getZExtValue() & 0xff, dl, MVT::i8); + Src4 = DAG.getTargetConstant(Src4->getAsZExtVal() & 0xff, dl, MVT::i8); } return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), @@ -26788,7 +26794,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, {Chain, Op1, Op2, Size}, VT, MMO); Chain = Res.getValue(1); Res = DAG.getZExtOrTrunc(getSETCC(X86::COND_B, Res, DL, DAG), DL, VT); - unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue(); + unsigned Imm = Op2->getAsZExtVal(); if (Imm) Res = DAG.getNode(ISD::SHL, DL, VT, Res, DAG.getShiftAmountConstant(Imm, VT, DL)); @@ -40221,6 +40227,34 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, } return SDValue(); } + case X86ISD::SHUF128: { + // If we're permuting the upper 256-bits subvectors of a concatenation, then + // see if we can peek through and access the subvector directly. + if (VT.is512BitVector()) { + // 512-bit mask uses 4 x i2 indices - if the msb is always set then only the + // upper subvector is used. + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + uint64_t Mask = N->getConstantOperandVal(2); + SmallVector<SDValue> LHSOps, RHSOps; + SDValue NewLHS, NewRHS; + if ((Mask & 0x0A) == 0x0A && + collectConcatOps(LHS.getNode(), LHSOps, DAG) && LHSOps.size() == 2) { + NewLHS = widenSubVector(LHSOps[1], false, Subtarget, DAG, DL, 512); + Mask &= ~0x0A; + } + if ((Mask & 0xA0) == 0xA0 && + collectConcatOps(RHS.getNode(), RHSOps, DAG) && RHSOps.size() == 2) { + NewRHS = widenSubVector(RHSOps[1], false, Subtarget, DAG, DL, 512); + Mask &= ~0xA0; + } + if (NewLHS || NewRHS) + return DAG.getNode(X86ISD::SHUF128, DL, VT, NewLHS ? NewLHS : LHS, + NewRHS ? NewRHS : RHS, + DAG.getTargetConstant(Mask, DL, MVT::i8)); + } + return SDValue(); + } case X86ISD::VPERM2X128: { // Fold vperm2x128(bitcast(x),bitcast(y),c) -> bitcast(vperm2x128(x,y,c)). SDValue LHS = N->getOperand(0); @@ -41320,6 +41354,20 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( return TLO.CombineTo(Op, Src); break; } + case X86ISD::VZEXT_LOAD: { + // If upper demanded elements are not demanded then simplify to a + // scalar_to_vector(load()). + MVT SVT = VT.getSimpleVT().getVectorElementType(); + if (DemandedElts == 1 && Op.getValue(1).use_empty() && isTypeLegal(SVT)) { + SDLoc DL(Op); + auto *Mem = cast<MemSDNode>(Op); + SDValue Elt = TLO.DAG.getLoad(SVT, DL, Mem->getChain(), Mem->getBasePtr(), + Mem->getMemOperand()); + SDValue Vec = TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Elt); + return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Vec)); + } + break; + } case X86ISD::VBROADCAST: { SDValue Src = Op.getOperand(0); MVT SrcVT = Src.getSimpleValueType(); @@ -41795,7 +41843,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); - unsigned ShAmt = cast<ConstantSDNode>(Op1)->getZExtValue(); + unsigned ShAmt = Op1->getAsZExtVal(); if (ShAmt >= BitWidth) break; @@ -42580,7 +42628,7 @@ static SDValue combinevXi1ConstantToInteger(SDValue Op, SelectionDAG &DAG) { APInt Imm(SrcVT.getVectorNumElements(), 0); for (unsigned Idx = 0, e = Op.getNumOperands(); Idx < e; ++Idx) { SDValue In = Op.getOperand(Idx); - if (!In.isUndef() && (cast<ConstantSDNode>(In)->getZExtValue() & 0x1)) + if (!In.isUndef() && (In->getAsZExtVal() & 0x1)) Imm.setBit(Idx); } EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), Imm.getBitWidth()); @@ -49931,18 +49979,17 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG, SDValue Ptr = Ld->getBasePtr(); SDValue Chain = Ld->getChain(); for (SDNode *User : Chain->uses()) { - if (User != N && + auto *UserLd = dyn_cast<MemSDNode>(User); + if (User != N && UserLd && (User->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD || User->getOpcode() == X86ISD::VBROADCAST_LOAD || ISD::isNormalLoad(User)) && - cast<MemSDNode>(User)->getChain() == Chain && - !User->hasAnyUseOfValue(1) && + UserLd->getChain() == Chain && !User->hasAnyUseOfValue(1) && User->getValueSizeInBits(0).getFixedValue() > RegVT.getFixedSizeInBits()) { if (User->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD && - cast<MemSDNode>(User)->getBasePtr() == Ptr && - cast<MemSDNode>(User)->getMemoryVT().getSizeInBits() == - MemVT.getSizeInBits()) { + UserLd->getBasePtr() == Ptr && + UserLd->getMemoryVT().getSizeInBits() == MemVT.getSizeInBits()) { SDValue Extract = extractSubVector(SDValue(User, 0), 0, DAG, SDLoc(N), RegVT.getSizeInBits()); Extract = DAG.getBitcast(RegVT, Extract); @@ -49961,7 +50008,7 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG, // See if we are loading a constant that matches in the lower // bits of a longer constant (but from a different constant pool ptr). EVT UserVT = User->getValueType(0); - SDValue UserPtr = cast<MemSDNode>(User)->getBasePtr(); + SDValue UserPtr = UserLd->getBasePtr(); const Constant *LdC = getTargetConstantFromBasePtr(Ptr); const Constant *UserC = getTargetConstantFromBasePtr(UserPtr); if (LdC && UserC && UserPtr != Ptr) { @@ -53258,7 +53305,7 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG, if (Index.getOpcode() == ISD::ADD && Index.getValueType().getVectorElementType() == PtrVT && isa<ConstantSDNode>(Scale)) { - uint64_t ScaleAmt = cast<ConstantSDNode>(Scale)->getZExtValue(); + uint64_t ScaleAmt = Scale->getAsZExtVal(); if (auto *BV = dyn_cast<BuildVectorSDNode>(Index.getOperand(1))) { BitVector UndefElts; if (ConstantSDNode *C = BV->getConstantSplatNode(&UndefElts)) { @@ -54572,6 +54619,14 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, Op0.getValueType() == cast<MemSDNode>(SrcVec)->getMemoryVT()) return Op0.getOperand(0); } + + // concat_vectors(permq(x),permq(x)) -> permq(concat_vectors(x,x)) + if (Op0.getOpcode() == X86ISD::VPERMI && Subtarget.useAVX512Regs() && + !X86::mayFoldLoad(Op0.getOperand(0), Subtarget)) + return DAG.getNode(Op0.getOpcode(), DL, VT, + DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, + Op0.getOperand(0), Op0.getOperand(0)), + Op0.getOperand(1)); } // concat(extract_subvector(v0,c0), extract_subvector(v1,c1)) -> vperm2x128. @@ -54979,6 +55034,19 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2)); } break; + case X86ISD::BLENDI: + if (NumOps == 2 && VT.is512BitVector() && Subtarget.useBWIRegs()) { + uint64_t Mask0 = Ops[0].getConstantOperandVal(2); + uint64_t Mask1 = Ops[1].getConstantOperandVal(2); + uint64_t Mask = (Mask1 << (VT.getVectorNumElements() / 2)) | Mask0; + MVT MaskSVT = MVT::getIntegerVT(VT.getVectorNumElements()); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + SDValue Sel = + DAG.getBitcast(MaskVT, DAG.getConstant(Mask, DL, MaskSVT)); + return DAG.getSelect(DL, VT, Sel, ConcatSubOperand(VT, Ops, 1), + ConcatSubOperand(VT, Ops, 0)); + } + break; case ISD::VSELECT: if (!IsSplat && Subtarget.hasAVX512() && (VT.is256BitVector() || @@ -57602,7 +57670,7 @@ X86TargetLowering::getStackProbeSize(const MachineFunction &MF) const { } Align X86TargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { - if (ML->isInnermost() && + if (ML && ML->isInnermost() && ExperimentalPrefInnermostLoopAlignment.getNumOccurrences()) return Align(1ULL << ExperimentalPrefInnermostLoopAlignment); return TargetLowering::getPrefLoopAlignment(); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 9bd1622cb0d3..32745400a38b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1714,16 +1714,6 @@ namespace llvm { MachineBasicBlock *Entry, const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; - bool splitValueIntoRegisterParts( - SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, - unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) - const override; - - SDValue joinRegisterPartsIntoValue( - SelectionDAG & DAG, const SDLoc &DL, const SDValue *Parts, - unsigned NumParts, MVT PartVT, EVT ValueVT, - std::optional<CallingConv::ID> CC) const override; - bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; bool mayBeEmittedAsTailCall(const CallInst *CI) const override; diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp index b8b5421b9005..d75bd4171fde 100644 --- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp @@ -127,6 +127,9 @@ MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, return getRegisterTypeForCallingConv(Context, CC, VT.changeVectorElementType(MVT::f16)); + if (VT == MVT::bf16) + return MVT::f16; + return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); } @@ -421,40 +424,6 @@ unsigned X86TargetLowering::getJumpTableEncoding() const { return TargetLowering::getJumpTableEncoding(); } -bool X86TargetLowering::splitValueIntoRegisterParts( - SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, - unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { - bool IsABIRegCopy = CC.has_value(); - EVT ValueVT = Val.getValueType(); - if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) { - unsigned ValueBits = ValueVT.getSizeInBits(); - unsigned PartBits = PartVT.getSizeInBits(); - Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val); - Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val); - Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); - Parts[0] = Val; - return true; - } - return false; -} - -SDValue X86TargetLowering::joinRegisterPartsIntoValue( - SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const { - bool IsABIRegCopy = CC.has_value(); - if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) { - unsigned ValueBits = ValueVT.getSizeInBits(); - unsigned PartBits = PartVT.getSizeInBits(); - SDValue Val = Parts[0]; - - Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val); - Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val); - Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - return Val; - } - return SDValue(); -} - bool X86TargetLowering::useSoftFloat() const { return Subtarget.useSoftFloat(); } diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index c3a673f97d34..fe7d90fbcdf7 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -448,7 +448,7 @@ multiclass vinsert_for_type<ValueType EltVT32, int Opcode128, X86VectorVTInfo< 2, EltVT64, VR128X>, X86VectorVTInfo< 4, EltVT64, VR256X>, null_frag, vinsert128_insert, sched>, - VEX_W1X, EVEX_V256; + EVEX_V256, REX_W; // Even with DQI we'd like to only use these instructions for masking. let Predicates = [HasDQI] in { @@ -750,7 +750,7 @@ multiclass vextract_for_type<ValueType EltVT32, int Opcode128, X86VectorVTInfo< 4, EltVT64, VR256X>, X86VectorVTInfo< 2, EltVT64, VR128X>, null_frag, vextract128_extract, SchedRR, SchedMR>, - VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>; + EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W; // Even with DQI we'd like to only use these instructions for masking. let Predicates = [HasDQI] in { @@ -1161,7 +1161,7 @@ multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr, defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss", avx512vl_f32_info>; defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd", - avx512vl_f64_info>, VEX_W1X; + avx512vl_f64_info>, REX_W; multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR, X86VectorVTInfo _, SDPatternOperator OpNode, @@ -1267,7 +1267,7 @@ defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", avx512vl_i32_info, HasAVX512, 1>; defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", - avx512vl_i64_info, HasAVX512, 1>, VEX_W1X; + avx512vl_i64_info, HasAVX512, 1>, REX_W; multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, @@ -1460,11 +1460,11 @@ let Predicates = [HasBF16, HasVLX] in let Predicates = [HasVLX, HasDQI] in { defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", - X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X, - EVEX_V256, EVEX_CD8<64, CD8VT2>; + X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, + EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W; defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", - X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X, - EVEX_V256, EVEX_CD8<64, CD8VT2>; + X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, + EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W; // Patterns for selects of bitcasted operations. def : Pat<(vselect_mask VK4WM:$mask, @@ -3185,15 +3185,13 @@ defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>; multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload, - X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, - bit NoRMPattern = 0, + X86SchedWriteMoveLS Sched, bit NoRMPattern = 0, SDPatternOperator SelectOprr = vselect> { let hasSideEffects = 0 in { let isMoveReg = 1 in def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], - _.ExeDomain>, EVEX, Sched<[Sched.RR]>, - EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; + _.ExeDomain>, EVEX, Sched<[Sched.RR]>; def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.RC:$src), !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", @@ -3209,8 +3207,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, !if(NoRMPattern, [], [(set _.RC:$dst, (_.VT (ld_frag addr:$src)))]), - _.ExeDomain>, EVEX, Sched<[Sched.RM]>, - EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; + _.ExeDomain>, EVEX, Sched<[Sched.RM]>; let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in { def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), @@ -3253,53 +3250,48 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr, AVX512VLVectorVTInfo _, Predicate prd, X86SchedWriteMoveLSWidths Sched, - string EVEX2VEXOvrd, bit NoRMPattern = 0> { + bit NoRMPattern = 0> { let Predicates = [prd] in defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.AlignedLdFrag, masked_load_aligned, - Sched.ZMM, "", NoRMPattern>, EVEX_V512; + Sched.ZMM, NoRMPattern>, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.AlignedLdFrag, masked_load_aligned, - Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256; + Sched.YMM, NoRMPattern>, EVEX_V256; defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.AlignedLdFrag, masked_load_aligned, - Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128; + Sched.XMM, NoRMPattern>, EVEX_V128; } } multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, AVX512VLVectorVTInfo _, Predicate prd, X86SchedWriteMoveLSWidths Sched, - string EVEX2VEXOvrd, bit NoRMPattern = 0, + bit NoRMPattern = 0, SDPatternOperator SelectOprr = vselect> { let Predicates = [prd] in defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag, - masked_load, Sched.ZMM, "", - NoRMPattern, SelectOprr>, EVEX_V512; + masked_load, Sched.ZMM, NoRMPattern, SelectOprr>, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag, - masked_load, Sched.YMM, EVEX2VEXOvrd#"Y", - NoRMPattern, SelectOprr>, EVEX_V256; + masked_load, Sched.YMM, NoRMPattern, SelectOprr>, EVEX_V256; defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag, - masked_load, Sched.XMM, EVEX2VEXOvrd, - NoRMPattern, SelectOprr>, EVEX_V128; + masked_load, Sched.XMM, NoRMPattern, SelectOprr>, EVEX_V128; } } multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName, X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore, - X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, - bit NoMRPattern = 0> { + X86SchedWriteMoveLS Sched, bit NoMRPattern = 0> { let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { let isMoveReg = 1 in def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src), OpcodeStr # "\t{$src, $dst|$dst, $src}", [], _.ExeDomain>, EVEX, - Sched<[Sched.RR]>, - EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">; + Sched<[Sched.RR]>; def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.RC:$src), OpcodeStr # "\t{$src, ${dst} {${mask}}|"# @@ -3319,8 +3311,7 @@ multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), !if(NoMRPattern, [], [(st_frag (_.VT _.RC:$src), addr:$dst)]), - _.ExeDomain>, EVEX, Sched<[Sched.MR]>, - EVEX2VEXOverride<EVEX2VEXOvrd#"mr">; + _.ExeDomain>, EVEX, Sched<[Sched.MR]>; def mrk : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", @@ -3344,102 +3335,92 @@ multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName, multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, AVX512VLVectorVTInfo _, Predicate prd, X86SchedWriteMoveLSWidths Sched, - string EVEX2VEXOvrd, bit NoMRPattern = 0> { + bit NoMRPattern = 0> { let Predicates = [prd] in defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store, - masked_store, Sched.ZMM, "", - NoMRPattern>, EVEX_V512; + masked_store, Sched.ZMM, NoMRPattern>, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store, - masked_store, Sched.YMM, - EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; + masked_store, Sched.YMM, NoMRPattern>, EVEX_V256; defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store, - masked_store, Sched.XMM, EVEX2VEXOvrd, - NoMRPattern>, EVEX_V128; + masked_store, Sched.XMM, NoMRPattern>, EVEX_V128; } } multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr, AVX512VLVectorVTInfo _, Predicate prd, X86SchedWriteMoveLSWidths Sched, - string EVEX2VEXOvrd, bit NoMRPattern = 0> { + bit NoMRPattern = 0> { let Predicates = [prd] in defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore, - masked_store_aligned, Sched.ZMM, "", - NoMRPattern>, EVEX_V512; + masked_store_aligned, Sched.ZMM, NoMRPattern>, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore, - masked_store_aligned, Sched.YMM, - EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; + masked_store_aligned, Sched.YMM, NoMRPattern>, EVEX_V256; defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore, - masked_store_aligned, Sched.XMM, EVEX2VEXOvrd, - NoMRPattern>, EVEX_V128; + masked_store_aligned, Sched.XMM, NoMRPattern>, EVEX_V128; } } defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, - HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, + HasAVX512, SchedWriteFMoveLS>, avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, - HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, + HasAVX512, SchedWriteFMoveLS>, TB, EVEX_CD8<32, CD8VF>; defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, - HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, + HasAVX512, SchedWriteFMoveLS>, avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, - HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, + HasAVX512, SchedWriteFMoveLS>, TB, PD, REX_W, EVEX_CD8<64, CD8VF>; defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, - SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>, + SchedWriteFMoveLS, 0, null_frag>, avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, - SchedWriteFMoveLS, "VMOVUPS">, + SchedWriteFMoveLS>, TB, EVEX_CD8<32, CD8VF>; defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, - SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>, + SchedWriteFMoveLS, 0, null_frag>, avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, - SchedWriteFMoveLS, "VMOVUPD">, + SchedWriteFMoveLS>, TB, PD, REX_W, EVEX_CD8<64, CD8VF>; defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, - HasAVX512, SchedWriteVecMoveLS, - "VMOVDQA", 1>, + HasAVX512, SchedWriteVecMoveLS, 1>, avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, - HasAVX512, SchedWriteVecMoveLS, - "VMOVDQA", 1>, + HasAVX512, SchedWriteVecMoveLS, 1>, TB, PD, EVEX_CD8<32, CD8VF>; defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, - HasAVX512, SchedWriteVecMoveLS, - "VMOVDQA">, + HasAVX512, SchedWriteVecMoveLS>, avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, - HasAVX512, SchedWriteVecMoveLS, - "VMOVDQA">, + HasAVX512, SchedWriteVecMoveLS>, TB, PD, REX_W, EVEX_CD8<64, CD8VF>; defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, - SchedWriteVecMoveLS, "VMOVDQU", 1>, + SchedWriteVecMoveLS, 1>, avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI, - SchedWriteVecMoveLS, "VMOVDQU", 1>, + SchedWriteVecMoveLS, 1>, TB, XD, EVEX_CD8<8, CD8VF>; defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, - SchedWriteVecMoveLS, "VMOVDQU", 1>, + SchedWriteVecMoveLS, 1>, avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI, - SchedWriteVecMoveLS, "VMOVDQU", 1>, + SchedWriteVecMoveLS, 1>, TB, XD, REX_W, EVEX_CD8<16, CD8VF>; defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, - SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>, + SchedWriteVecMoveLS, 1, null_frag>, avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512, - SchedWriteVecMoveLS, "VMOVDQU", 1>, + SchedWriteVecMoveLS, 1>, TB, XS, EVEX_CD8<32, CD8VF>; defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, - SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>, + SchedWriteVecMoveLS, 0, null_frag>, avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512, - SchedWriteVecMoveLS, "VMOVDQU">, + SchedWriteVecMoveLS>, TB, XS, REX_W, EVEX_CD8<64, CD8VF>; // Special instructions to help with spilling when we don't have VLX. We need @@ -4844,8 +4825,7 @@ defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, SchedWriteVecIMul, HasBWI, 1>; defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, - SchedWriteVecIMul, HasDQI, 1>, T8, - NotEVEX2VEXConvertible; + SchedWriteVecIMul, HasDQI, 1>, T8; defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul, HasBWI, 1>; defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul, @@ -4989,8 +4969,7 @@ defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax, SchedWriteVecALU, HasAVX512, 1>, T8; defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax, - SchedWriteVecALU, HasAVX512, 1>, T8, - NotEVEX2VEXConvertible; + SchedWriteVecALU, HasAVX512, 1>, T8; defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, SchedWriteVecALU, HasBWI, 1>; @@ -4999,8 +4978,7 @@ defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax, SchedWriteVecALU, HasAVX512, 1>, T8; defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax, - SchedWriteVecALU, HasAVX512, 1>, T8, - NotEVEX2VEXConvertible; + SchedWriteVecALU, HasAVX512, 1>, T8; defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, SchedWriteVecALU, HasBWI, 1>, T8; @@ -5009,8 +4987,7 @@ defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin, SchedWriteVecALU, HasAVX512, 1>, T8; defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin, - SchedWriteVecALU, HasAVX512, 1>, T8, - NotEVEX2VEXConvertible; + SchedWriteVecALU, HasAVX512, 1>, T8; defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, SchedWriteVecALU, HasBWI, 1>; @@ -5019,8 +4996,7 @@ defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin, SchedWriteVecALU, HasAVX512, 1>, T8; defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin, - SchedWriteVecALU, HasAVX512, 1>, T8, - NotEVEX2VEXConvertible; + SchedWriteVecALU, HasAVX512, 1>, T8; // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. let Predicates = [HasDQI, NoVLX] in { @@ -5405,8 +5381,7 @@ multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo } multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, SDNode OpNode, SDNode VecNode, SDNode SaeNode, - X86FoldableSchedWrite sched, bit IsCommutable, - string EVEX2VexOvrd> { + X86FoldableSchedWrite sched, bit IsCommutable> { let ExeDomain = _.ExeDomain in { defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2), OpcodeStr, @@ -5427,8 +5402,7 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, - Sched<[sched]>, - EVEX2VEXOverride<EVEX2VexOvrd#"rr"> { + Sched<[sched]> { let isCommutable = IsCommutable; } def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), @@ -5436,8 +5410,7 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2)))]>, - Sched<[sched.Folded, sched.ReadAfterFold]>, - EVEX2VEXOverride<EVEX2VexOvrd#"rm">; + Sched<[sched.Folded, sched.ReadAfterFold]>; } let Uses = [MXCSR] in @@ -5474,19 +5447,15 @@ multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode VecNode, SDNode SaeNode, X86SchedWriteSizes sched, bit IsCommutable> { defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, - VecNode, SaeNode, sched.PS.Scl, IsCommutable, - NAME#"SS">, + VecNode, SaeNode, sched.PS.Scl, IsCommutable>, TB, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, - VecNode, SaeNode, sched.PD.Scl, IsCommutable, - NAME#"SD">, + VecNode, SaeNode, sched.PD.Scl, IsCommutable>, TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>; let Predicates = [HasFP16] in { defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode, - VecNode, SaeNode, sched.PH.Scl, IsCommutable, - NAME#"SH">, - T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, - NotEVEX2VEXConvertible; + VecNode, SaeNode, sched.PH.Scl, IsCommutable>, + T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>; } } defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds, @@ -5506,14 +5475,13 @@ defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs, // X86fminc and X86fmaxc instead of X86fmin and X86fmax multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, SDNode OpNode, - X86FoldableSchedWrite sched, - string EVEX2VEXOvrd> { + X86FoldableSchedWrite sched> { let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, - Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> { + Sched<[sched]> { let isCommutable = 1; } def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), @@ -5521,36 +5489,34 @@ multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2)))]>, - Sched<[sched.Folded, sched.ReadAfterFold]>, - EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, - SchedWriteFCmp.Scl, "VMINCSS">, TB, XS, + SchedWriteFCmp.Scl>, TB, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, - SchedWriteFCmp.Scl, "VMINCSD">, TB, XD, + SchedWriteFCmp.Scl>, TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>, SIMD_EXC; defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, - SchedWriteFCmp.Scl, "VMAXCSS">, TB, XS, + SchedWriteFCmp.Scl>, TB, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, - SchedWriteFCmp.Scl, "VMAXCSD">, TB, XD, + SchedWriteFCmp.Scl>, TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>, SIMD_EXC; defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc, - SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5, XS, - EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC, - NotEVEX2VEXConvertible; + SchedWriteFCmp.Scl>, T_MAP5, XS, + EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC; + defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc, - SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5, XS, - EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC, - NotEVEX2VEXConvertible; + SchedWriteFCmp.Scl>, T_MAP5, XS, + EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC; multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, SDPatternOperator MaskOpNode, @@ -5820,8 +5786,7 @@ multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PD; } } -defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", - SchedWriteFAdd>, NotEVEX2VEXConvertible; +defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", SchedWriteFAdd>; //===----------------------------------------------------------------------===// // AVX-512 VPTESTM instructions @@ -5985,11 +5950,9 @@ multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw, string OpcodeStr, SDNode OpNode, - X86SchedWriteWidths sched, - bit NotEVEX2VEXConvertibleQ = 0> { + X86SchedWriteWidths sched> { defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32, avx512vl_i32_info, HasAVX512>; - let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64, avx512vl_i64_info, HasAVX512>, REX_W; defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16, @@ -6034,11 +5997,9 @@ multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM, multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, Format ImmFormR, Format ImmFormM, string OpcodeStr, SDNode OpNode, - X86SchedWriteWidths sched, - bit NotEVEX2VEXConvertibleQ = 0> { + X86SchedWriteWidths sched> { defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode, sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; - let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode, sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W; } @@ -6054,7 +6015,7 @@ defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, - SchedWriteVecShiftImm, 1>, + SchedWriteVecShiftImm>, avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; @@ -6066,7 +6027,7 @@ defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, SchedWriteVecShift>; defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, - SchedWriteVecShift, 1>; + SchedWriteVecShift>; defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, SchedWriteVecShift>; @@ -6435,7 +6396,7 @@ defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, avx512vl_i32_info>; let ExeDomain = SSEPackedDouble in defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, - avx512vl_i64_info>, VEX_W1X; + avx512vl_i64_info>, REX_W; //===----------------------------------------------------------------------===// // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW @@ -8443,9 +8404,9 @@ multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNo } let Predicates = [HasDQI, HasVLX] in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode, - MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible; + MaskOpNode, sched.XMM>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode, - MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible; + MaskOpNode, sched.YMM>, EVEX_V256; } } @@ -8524,11 +8485,10 @@ multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperato defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag, null_frag, sched.XMM, _src.info128.BroadcastStr, "{x}", i128mem, _src.info128.KRCWM>, - EVEX_V128, NotEVEX2VEXConvertible; + EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode, MaskOpNode, sched.YMM, _src.info256.BroadcastStr, - "{y}">, EVEX_V256, - NotEVEX2VEXConvertible; + "{y}">, EVEX_V256; // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction // patterns have been disabled with null_frag. @@ -10882,8 +10842,7 @@ defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info, multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _, - X86VectorVTInfo CastInfo, - string EVEX2VEXOvrd> { + X86VectorVTInfo CastInfo> { let ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), @@ -10891,7 +10850,7 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, (_.VT (bitconvert (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))))>, - Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; + Sched<[sched]>; defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", @@ -10900,8 +10859,7 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, (CastInfo.VT (X86Shuf128 _.RC:$src1, (CastInfo.LdFrag addr:$src2), (i8 timm:$src3)))))>, - Sched<[sched.Folded, sched.ReadAfterFold]>, - EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; + Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", @@ -10918,45 +10876,40 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _, - AVX512VLVectorVTInfo CastInfo, bits<8> opc, - string EVEX2VEXOvrd>{ + AVX512VLVectorVTInfo CastInfo, bits<8> opc>{ let Predicates = [HasAVX512] in defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, - _.info512, CastInfo.info512, "">, EVEX_V512; + _.info512, CastInfo.info512>, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, - _.info256, CastInfo.info256, - EVEX2VEXOvrd>, EVEX_V256; + _.info256, CastInfo.info256>, EVEX_V256; } defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256, - avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; + avx512vl_f32_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256, - avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; + avx512vl_f64_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256, - avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; + avx512vl_i32_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256, - avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; + avx512vl_i64_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; multiclass avx512_valign<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _>{ - // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the - // instantiation of this class. let ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>, - Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">; + Sched<[sched]>; defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (X86VAlign _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)), (i8 timm:$src3)))>, - Sched<[sched.Folded, sched.ReadAfterFold]>, - EVEX2VEXOverride<"VPALIGNRrmi">; + Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), @@ -10979,7 +10932,6 @@ multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched, defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>, AVX512AIi8Base, EVEX, VVVV, EVEX_V128; // We can't really override the 256-bit version so change it back to unset. - let EVEX2VEXOverride = ? in defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>, AVX512AIi8Base, EVEX, VVVV, EVEX_V256; } @@ -11111,7 +11063,7 @@ let Predicates = [HasVLX, HasBWI] in { defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>, - EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible; + EVEX_CD8<8, CD8VF>; multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { @@ -13088,12 +13040,10 @@ multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNo let Predicates = [HasFP16, HasVLX] in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info, null_frag, null_frag, sched.XMM, "{1to2}", "{x}", - i128mem, VK2WM>, - EVEX_V128, NotEVEX2VEXConvertible; + i128mem, VK2WM>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info, null_frag, null_frag, sched.YMM, "{1to4}", "{y}", - i256mem, VK4WM>, - EVEX_V256, NotEVEX2VEXConvertible; + i256mem, VK4WM>, EVEX_V256; } def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index 6b0c1b8c28c9..5cfa95e085e3 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -71,24 +71,60 @@ multiclass Mul<bits<8> o, string m, Format RegMRM, Format MemMRM, SDPatternOpera // FIXME: Used for 8-bit mul, ignore result upper 8 bits. // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. - let Defs = [AL,EFLAGS,AX], Uses = [AL] in - def 8r : MulDivOpR<o, RegMRM, m, Xi8, WriteIMul8, - [(set AL, (node AL, GR8:$src1)), (implicit EFLAGS)]>; - let Defs = [AX,DX,EFLAGS], Uses = [AX] in - def 16r : MulDivOpR<o, RegMRM, m, Xi16, WriteIMul16, []>, OpSize16; - let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in - def 32r : MulDivOpR<o, RegMRM, m, Xi32, WriteIMul32, []>, OpSize32; - let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in - def 64r : MulDivOpR<o, RegMRM, m, Xi64, WriteIMul64, []>; - let Defs = [AL,EFLAGS,AX], Uses = [AL] in - def 8m : MulDivOpM<o, MemMRM, m, Xi8, WriteIMul8, - [(set AL, (node AL, (loadi8 addr:$src1))), (implicit EFLAGS)]>; - let Defs = [AX,DX,EFLAGS], Uses = [AX] in - def 16m : MulDivOpM<o, MemMRM, m, Xi16, WriteIMul16, []>, OpSize16; - let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in - def 32m : MulDivOpM<o, MemMRM, m, Xi32, WriteIMul32, []>, OpSize32; - let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in - def 64m : MulDivOpM<o, MemMRM, m, Xi64, WriteIMul64, []>, Requires<[In64BitMode]>; + let Defs = [AL, EFLAGS, AX], Uses = [AL] in + def 8r : MulDivOpR<o, RegMRM, m, Xi8, WriteIMul8, + [(set AL, (node AL, GR8:$src1)), (implicit EFLAGS)]>; + let Defs = [AX, DX, EFLAGS], Uses = [AX] in + def 16r : MulDivOpR<o, RegMRM, m, Xi16, WriteIMul16, []>, OpSize16; + let Defs = [EAX, EDX, EFLAGS], Uses = [EAX] in + def 32r : MulDivOpR<o, RegMRM, m, Xi32, WriteIMul32, []>, OpSize32; + let Defs = [RAX, RDX, EFLAGS], Uses = [RAX] in + def 64r : MulDivOpR<o, RegMRM, m, Xi64, WriteIMul64, []>; + let Defs = [AL, EFLAGS, AX], Uses = [AL] in + def 8m : MulDivOpM<o, MemMRM, m, Xi8, WriteIMul8, + [(set AL, (node AL, (loadi8 addr:$src1))), (implicit EFLAGS)]>; + let Defs = [AX, DX, EFLAGS], Uses = [AX] in + def 16m : MulDivOpM<o, MemMRM, m, Xi16, WriteIMul16, []>, OpSize16; + let Defs = [EAX, EDX, EFLAGS], Uses = [EAX] in + def 32m : MulDivOpM<o, MemMRM, m, Xi32, WriteIMul32, []>, OpSize32; + let Defs = [RAX, RDX, EFLAGS], Uses = [RAX] in + def 64m : MulDivOpM<o, MemMRM, m, Xi64, WriteIMul64, []>, Requires<[In64BitMode]>; + + let Predicates = [In64BitMode] in { + let Defs = [AL, AX], Uses = [AL] in + def 8r_NF : MulDivOpR<o, RegMRM, m, Xi8, WriteIMul8, []>, NF; + let Defs = [AX, DX], Uses = [AX] in + def 16r_NF : MulDivOpR<o, RegMRM, m, Xi16, WriteIMul16, []>, NF, PD; + let Defs = [EAX, EDX], Uses = [EAX] in + def 32r_NF : MulDivOpR<o, RegMRM, m, Xi32, WriteIMul32, []>, NF; + let Defs = [RAX, RDX], Uses = [RAX] in + def 64r_NF : MulDivOpR<o, RegMRM, m, Xi64, WriteIMul64, []>, NF; + let Defs = [AL, AX], Uses = [AL] in + def 8m_NF : MulDivOpM<o, MemMRM, m, Xi8, WriteIMul8, []>, NF; + let Defs = [AX, DX], Uses = [AX] in + def 16m_NF : MulDivOpM<o, MemMRM, m, Xi16, WriteIMul16, []>, NF, PD; + let Defs = [EAX, EDX], Uses = [EAX] in + def 32m_NF : MulDivOpM<o, MemMRM, m, Xi32, WriteIMul32, []>, NF; + let Defs = [RAX, RDX], Uses = [RAX] in + def 64m_NF : MulDivOpM<o, MemMRM, m, Xi64, WriteIMul64, []>, NF; + + let Defs = [AL, EFLAGS, AX], Uses = [AL] in + def 8r_EVEX : MulDivOpR<o, RegMRM, m, Xi8, WriteIMul8, []>, PL; + let Defs = [AX, DX, EFLAGS], Uses = [AX] in + def 16r_EVEX : MulDivOpR<o, RegMRM, m, Xi16, WriteIMul16, []>, PL, PD; + let Defs = [EAX, EDX, EFLAGS], Uses = [EAX] in + def 32r_EVEX : MulDivOpR<o, RegMRM, m, Xi32, WriteIMul32, []>, PL; + let Defs = [RAX, RDX, EFLAGS], Uses = [RAX] in + def 64r_EVEX : MulDivOpR<o, RegMRM, m, Xi64, WriteIMul64, []>, PL; + let Defs = [AL, EFLAGS, AX], Uses = [AL] in + def 8m_EVEX : MulDivOpM<o, MemMRM, m, Xi8, WriteIMul8, []>, PL; + let Defs = [AX, DX, EFLAGS], Uses = [AX] in + def 16m_EVEX : MulDivOpM<o, MemMRM, m, Xi16, WriteIMul16, []>, PL, PD; + let Defs = [EAX, EDX, EFLAGS], Uses = [EAX] in + def 32m_EVEX : MulDivOpM<o, MemMRM, m, Xi32, WriteIMul32, []>, PL; + let Defs = [RAX, RDX, EFLAGS], Uses = [RAX] in + def 64m_EVEX : MulDivOpM<o, MemMRM, m, Xi64, WriteIMul64, []>, PL; + } } defm MUL : Mul<0xF7, "mul", MRM4r, MRM4m, mul>; @@ -99,137 +135,341 @@ multiclass Div<bits<8> o, string m, Format RegMRM, Format MemMRM> { defvar sched16 = !if(!eq(m, "div"), WriteDiv16, WriteIDiv16); defvar sched32 = !if(!eq(m, "div"), WriteDiv32, WriteIDiv32); defvar sched64 = !if(!eq(m, "div"), WriteDiv64, WriteIDiv64); - let Defs = [AL,AH,EFLAGS], Uses = [AX] in - def 8r : MulDivOpR<o, RegMRM, m, Xi8, sched8, []>; - let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in - def 16r : MulDivOpR<o, RegMRM, m, Xi16, sched16, []>, OpSize16; - let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in - def 32r : MulDivOpR<o, RegMRM, m, Xi32, sched32, []>, OpSize32; - let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in - def 64r : MulDivOpR<o, RegMRM, m, Xi64, sched64, []>; - let Defs = [AL,AH,EFLAGS], Uses = [AX] in - def 8m : MulDivOpM<o, MemMRM, m, Xi8, sched8, []>; - let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in - def 16m : MulDivOpM<o, MemMRM, m, Xi16, sched16, []>, OpSize16; - let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in - def 32m : MulDivOpM<o, MemMRM, m, Xi32, sched32, []>, OpSize32; - let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in - def 64m : MulDivOpM<o, MemMRM, m, Xi64, sched64, []>, Requires<[In64BitMode]>; + let Defs = [AL, AH, EFLAGS], Uses = [AX] in + def 8r : MulDivOpR<o, RegMRM, m, Xi8, sched8, []>; + let Defs = [AX, DX, EFLAGS], Uses = [AX, DX] in + def 16r : MulDivOpR<o, RegMRM, m, Xi16, sched16, []>, OpSize16; + let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EDX] in + def 32r : MulDivOpR<o, RegMRM, m, Xi32, sched32, []>, OpSize32; + let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RDX] in + def 64r : MulDivOpR<o, RegMRM, m, Xi64, sched64, []>; + let Defs = [AL, AH, EFLAGS], Uses = [AX] in + def 8m : MulDivOpM<o, MemMRM, m, Xi8, sched8, []>; + let Defs = [AX, DX, EFLAGS], Uses = [AX, DX] in + def 16m : MulDivOpM<o, MemMRM, m, Xi16, sched16, []>, OpSize16; + let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EDX] in + def 32m : MulDivOpM<o, MemMRM, m, Xi32, sched32, []>, OpSize32; + let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RDX] in + def 64m : MulDivOpM<o, MemMRM, m, Xi64, sched64, []>, Requires<[In64BitMode]>; + + let Predicates = [In64BitMode] in { + let Defs = [AL, AH], Uses = [AX] in + def 8r_NF : MulDivOpR<o, RegMRM, m, Xi8, sched8, []>, NF; + let Defs = [AX, DX], Uses = [AX, DX] in + def 16r_NF : MulDivOpR<o, RegMRM, m, Xi16, sched16, []>, NF, PD; + let Defs = [EAX, EDX], Uses = [EAX, EDX] in + def 32r_NF : MulDivOpR<o, RegMRM, m, Xi32, sched32, []>, NF; + let Defs = [RAX, RDX], Uses = [RAX, RDX] in + def 64r_NF : MulDivOpR<o, RegMRM, m, Xi64, sched64, []>, NF; + let Defs = [AL, AH], Uses = [AX] in + def 8m_NF : MulDivOpM<o, MemMRM, m, Xi8, sched8, []>, NF; + let Defs = [AX, DX], Uses = [AX, DX] in + def 16m_NF : MulDivOpM<o, MemMRM, m, Xi16, sched16, []>, NF, PD; + let Defs = [EAX, EDX], Uses = [EAX, EDX] in + def 32m_NF : MulDivOpM<o, MemMRM, m, Xi32, sched32, []>, NF; + let Defs = [RAX, RDX], Uses = [RAX, RDX] in + def 64m_NF : MulDivOpM<o, MemMRM, m, Xi64, sched64, []>, NF; + + let Defs = [AL, AH, EFLAGS], Uses = [AX] in + def 8r_EVEX : MulDivOpR<o, RegMRM, m, Xi8, sched8, []>, PL; + let Defs = [AX, DX, EFLAGS], Uses = [AX, DX] in + def 16r_EVEX : MulDivOpR<o, RegMRM, m, Xi16, sched16, []>, PL, PD; + let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EDX] in + def 32r_EVEX : MulDivOpR<o, RegMRM, m, Xi32, sched32, []>, PL; + let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RDX] in + def 64r_EVEX : MulDivOpR<o, RegMRM, m, Xi64, sched64, []>, PL; + let Defs = [AL, AH, EFLAGS], Uses = [AX] in + def 8m_EVEX : MulDivOpM<o, MemMRM, m, Xi8, sched8, []>, PL; + let Defs = [AX, DX, EFLAGS], Uses = [AX, DX] in + def 16m_EVEX : MulDivOpM<o, MemMRM, m, Xi16, sched16, []>, PL, PD; + let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EDX] in + def 32m_EVEX : MulDivOpM<o, MemMRM, m, Xi32, sched32, []>, PL; + let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RDX] in + def 64m_EVEX : MulDivOpM<o, MemMRM, m, Xi64, sched64, []>, PL; + } } + let hasSideEffects = 1 in { // so that we don't speculatively execute -defm DIV: Div<0xF7, "div", MRM6r, MRM6m>; -defm IDIV: Div<0xF7, "idiv", MRM7r, MRM7m>; + defm DIV: Div<0xF7, "div", MRM6r, MRM6m>; + defm IDIV: Div<0xF7, "idiv", MRM7r, MRM7m>; } -class IMulOpRR<X86TypeInfo t, X86FoldableSchedWrite sched> - : BinOpRR_RF<0xAF, "imul", t, X86smul_flag>, TB { +class IMulOpRR_R<X86TypeInfo t, X86FoldableSchedWrite sched, bit ndd = 0> + : BinOpRR_R<0xAF, "imul", t, ndd> { let Form = MRMSrcReg; let SchedRW = [sched]; // X = IMUL Y, Z --> X = IMUL Z, Y let isCommutable = 1; } -class IMulOpRM<X86TypeInfo t, X86FoldableSchedWrite sched> - : BinOpRM_RF<0xAF, "imul", t, X86smul_flag>, TB { -let Form = MRMSrcMem; -let SchedRW = [sched.Folded, sched.ReadAfterFold]; +class IMulOpRR_RF<X86TypeInfo t, X86FoldableSchedWrite sched, bit ndd = 0> + : BinOpRR_RF<0xAF, "imul", t, X86smul_flag, ndd> { + let Form = MRMSrcReg; + let SchedRW = [sched]; + // X = IMUL Y, Z --> X = IMUL Z, Y + let isCommutable = 1; +} +class IMulOpRM_R<X86TypeInfo t, X86FoldableSchedWrite sched, bit ndd = 0> + : BinOpRM_R<0xAF, "imul", t, ndd> { + let Form = MRMSrcMem; + let SchedRW = [sched.Folded, sched.ReadAfterFold]; +} +class IMulOpRM_RF<X86TypeInfo t, X86FoldableSchedWrite sched, bit ndd = 0> + : BinOpRM_RF<0xAF, "imul", t, X86smul_flag, ndd> { + let Form = MRMSrcMem; + let SchedRW = [sched.Folded, sched.ReadAfterFold]; +} + +let Predicates = [NoNDD] in { + def IMUL16rr : IMulOpRR_RF<Xi16, WriteIMul16Reg>, TB, OpSize16; + def IMUL32rr : IMulOpRR_RF<Xi32, WriteIMul32Reg>, TB, OpSize32; + def IMUL64rr : IMulOpRR_RF<Xi64, WriteIMul64Reg>, TB; + def IMUL16rm : IMulOpRM_RF<Xi16, WriteIMul16Reg>, TB, OpSize16; + def IMUL32rm : IMulOpRM_RF<Xi32, WriteIMul32Reg>, TB, OpSize32; + def IMUL64rm : IMulOpRM_RF<Xi64, WriteIMul64Reg>, TB; +} +let Predicates = [HasNDD, In64BitMode] in { + def IMUL16rr_ND : IMulOpRR_RF<Xi16, WriteIMul16Reg, 1>, PD; + def IMUL32rr_ND : IMulOpRR_RF<Xi32, WriteIMul32Reg, 1>; + def IMUL64rr_ND : IMulOpRR_RF<Xi64, WriteIMul64Reg, 1>; + def IMUL16rm_ND : IMulOpRM_RF<Xi16, WriteIMul16Reg, 1>, PD; + def IMUL32rm_ND : IMulOpRM_RF<Xi32, WriteIMul32Reg, 1>; + def IMUL64rm_ND : IMulOpRM_RF<Xi64, WriteIMul64Reg, 1>; } -def IMUL16rr : IMulOpRR<Xi16, WriteIMul16Reg>, OpSize16; -def IMUL32rr : IMulOpRR<Xi32, WriteIMul32Reg>, OpSize32; -def IMUL64rr : IMulOpRR<Xi64, WriteIMul64Reg>; -def IMUL16rm : IMulOpRM<Xi16, WriteIMul16Reg>, OpSize16; -def IMUL32rm : IMulOpRM<Xi32, WriteIMul32Reg>, OpSize32; -def IMUL64rm : IMulOpRM<Xi64, WriteIMul64Reg>; +let Predicates = [In64BitMode], Pattern = [(null_frag)] in { + def IMUL16rr_NF : IMulOpRR_R<Xi16, WriteIMul16Reg>, NF, PD; + def IMUL32rr_NF : IMulOpRR_R<Xi32, WriteIMul32Reg>, NF; + def IMUL64rr_NF : IMulOpRR_R<Xi64, WriteIMul64Reg>, NF; + def IMUL16rm_NF : IMulOpRM_R<Xi16, WriteIMul16Reg>, NF, PD; + def IMUL32rm_NF : IMulOpRM_R<Xi32, WriteIMul32Reg>, NF; + def IMUL64rm_NF : IMulOpRM_R<Xi64, WriteIMul64Reg>, NF; + + def IMUL16rr_NF_ND : IMulOpRR_R<Xi16, WriteIMul16Reg, 1>, EVEX_NF, PD; + def IMUL32rr_NF_ND : IMulOpRR_R<Xi32, WriteIMul32Reg, 1>, EVEX_NF; + def IMUL64rr_NF_ND : IMulOpRR_R<Xi64, WriteIMul64Reg, 1>, EVEX_NF; + def IMUL16rm_NF_ND : IMulOpRM_R<Xi16, WriteIMul16Reg, 1>, EVEX_NF, PD; + def IMUL32rm_NF_ND : IMulOpRM_R<Xi32, WriteIMul32Reg, 1>, EVEX_NF; + def IMUL64rm_NF_ND : IMulOpRM_R<Xi64, WriteIMul64Reg, 1>, EVEX_NF; + + def IMUL16rr_EVEX : IMulOpRR_RF<Xi16, WriteIMul16Reg>, PL, PD; + def IMUL32rr_EVEX : IMulOpRR_RF<Xi32, WriteIMul32Reg>, PL; + def IMUL64rr_EVEX : IMulOpRR_RF<Xi64, WriteIMul64Reg>, PL; + def IMUL16rm_EVEX : IMulOpRM_RF<Xi16, WriteIMul16Reg>, PL, PD; + def IMUL32rm_EVEX : IMulOpRM_RF<Xi32, WriteIMul32Reg>, PL; + def IMUL64rm_EVEX : IMulOpRM_RF<Xi64, WriteIMul64Reg>, PL; +} class IMulOpRI8_R<X86TypeInfo t, X86FoldableSchedWrite sched> : BinOpRI8<0x6B, "imul", binop_ndd_args, t, MRMSrcReg, - (outs t.RegClass:$dst)>, DefEFLAGS { + (outs t.RegClass:$dst)> { let SchedRW = [sched]; } class IMulOpRI_R<X86TypeInfo t, X86FoldableSchedWrite sched> : BinOpRI<0x69, "imul", binop_ndd_args, t, MRMSrcReg, + (outs t.RegClass:$dst), []> { + let SchedRW = [sched]; +} +class IMulOpRI_RF<X86TypeInfo t, X86FoldableSchedWrite sched> + : BinOpRI<0x69, "imul", binop_ndd_args, t, MRMSrcReg, (outs t.RegClass:$dst), [(set t.RegClass:$dst, EFLAGS, (X86smul_flag t.RegClass:$src1, t.ImmNoSuOperator:$src2))]>, DefEFLAGS { let SchedRW = [sched]; } class IMulOpMI8_R<X86TypeInfo t, X86FoldableSchedWrite sched> - : BinOpMI8<"imul", binop_ndd_args, t, MRMSrcMem, (outs t.RegClass:$dst)>, - DefEFLAGS { + : BinOpMI8<"imul", binop_ndd_args, t, MRMSrcMem, (outs t.RegClass:$dst)> { let Opcode = 0x6B; let SchedRW = [sched.Folded]; } class IMulOpMI_R<X86TypeInfo t, X86FoldableSchedWrite sched> : BinOpMI<0x69, "imul", binop_ndd_args, t, MRMSrcMem, + (outs t.RegClass:$dst), []> { + let SchedRW = [sched.Folded]; +} +class IMulOpMI_RF<X86TypeInfo t, X86FoldableSchedWrite sched> + : BinOpMI<0x69, "imul", binop_ndd_args, t, MRMSrcMem, (outs t.RegClass:$dst), [(set t.RegClass:$dst, EFLAGS, (X86smul_flag (t.LoadNode addr:$src1), t.ImmNoSuOperator:$src2))]>, DefEFLAGS { let SchedRW = [sched.Folded]; } -def IMUL16rri8 : IMulOpRI8_R<Xi16, WriteIMul16Imm>, OpSize16; -def IMUL32rri8 : IMulOpRI8_R<Xi32, WriteIMul32Imm>, OpSize32; -def IMUL64rri8 : IMulOpRI8_R<Xi64, WriteIMul64Imm>; -def IMUL16rri : IMulOpRI_R<Xi16, WriteIMul16Imm>, OpSize16; -def IMUL32rri : IMulOpRI_R<Xi32, WriteIMul32Imm>, OpSize32; -def IMUL64rri32 : IMulOpRI_R<Xi64, WriteIMul64Imm>; - -def IMUL16rmi8 : IMulOpMI8_R<Xi16, WriteIMul16Imm>, OpSize16; -def IMUL32rmi8 : IMulOpMI8_R<Xi32, WriteIMul32Imm>, OpSize32; -def IMUL64rmi8 : IMulOpMI8_R<Xi64, WriteIMul64Imm>; -def IMUL16rmi : IMulOpMI_R<Xi16, WriteIMul16Imm>, OpSize16; -def IMUL32rmi : IMulOpMI_R<Xi32, WriteIMul32Imm>, OpSize32; -def IMUL64rmi32 : IMulOpMI_R<Xi64, WriteIMul64Imm>; - +def IMUL16rri8 : IMulOpRI8_R<Xi16, WriteIMul16Imm>, DefEFLAGS, OpSize16; +def IMUL32rri8 : IMulOpRI8_R<Xi32, WriteIMul32Imm>, DefEFLAGS, OpSize32; +def IMUL64rri8 : IMulOpRI8_R<Xi64, WriteIMul64Imm>, DefEFLAGS; +def IMUL16rri : IMulOpRI_RF<Xi16, WriteIMul16Imm>, OpSize16; +def IMUL32rri : IMulOpRI_RF<Xi32, WriteIMul32Imm>, OpSize32; +def IMUL64rri32 : IMulOpRI_RF<Xi64, WriteIMul64Imm>; +def IMUL16rmi8 : IMulOpMI8_R<Xi16, WriteIMul16Imm>, DefEFLAGS, OpSize16; +def IMUL32rmi8 : IMulOpMI8_R<Xi32, WriteIMul32Imm>, DefEFLAGS, OpSize32; +def IMUL64rmi8 : IMulOpMI8_R<Xi64, WriteIMul64Imm>, DefEFLAGS; +def IMUL16rmi : IMulOpMI_RF<Xi16, WriteIMul16Imm>, OpSize16; +def IMUL32rmi : IMulOpMI_RF<Xi32, WriteIMul32Imm>, OpSize32; +def IMUL64rmi32 : IMulOpMI_RF<Xi64, WriteIMul64Imm>; + +let Predicates = [In64BitMode] in { + def IMUL16rri8_NF : IMulOpRI8_R<Xi16, WriteIMul16Imm>, NF, PD; + def IMUL32rri8_NF : IMulOpRI8_R<Xi32, WriteIMul32Imm>, NF; + def IMUL64rri8_NF : IMulOpRI8_R<Xi64, WriteIMul64Imm>, NF; + def IMUL16rri_NF : IMulOpRI_R<Xi16, WriteIMul16Imm>, NF, PD; + def IMUL32rri_NF : IMulOpRI_R<Xi32, WriteIMul32Imm>, NF; + def IMUL64rri32_NF : IMulOpRI_R<Xi64, WriteIMul64Imm>, NF; + def IMUL16rmi8_NF : IMulOpMI8_R<Xi16, WriteIMul16Imm>, NF, PD; + def IMUL32rmi8_NF : IMulOpMI8_R<Xi32, WriteIMul32Imm>, NF; + def IMUL64rmi8_NF : IMulOpMI8_R<Xi64, WriteIMul64Imm>, NF; + def IMUL16rmi_NF : IMulOpMI_R<Xi16, WriteIMul16Imm>, NF, PD; + def IMUL32rmi_NF : IMulOpMI_R<Xi32, WriteIMul32Imm>, NF; + def IMUL64rmi32_NF : IMulOpMI_R<Xi64, WriteIMul64Imm>, NF; + + def IMUL16rri8_EVEX : IMulOpRI8_R<Xi16, WriteIMul16Imm>, DefEFLAGS, PL, PD; + def IMUL32rri8_EVEX : IMulOpRI8_R<Xi32, WriteIMul32Imm>, DefEFLAGS, PL; + def IMUL64rri8_EVEX : IMulOpRI8_R<Xi64, WriteIMul64Imm>, DefEFLAGS, PL; + def IMUL16rri_EVEX : IMulOpRI_RF<Xi16, WriteIMul16Imm>, PL, PD; + def IMUL32rri_EVEX : IMulOpRI_RF<Xi32, WriteIMul32Imm>, PL; + def IMUL64rri32_EVEX : IMulOpRI_RF<Xi64, WriteIMul64Imm>, PL; + def IMUL16rmi8_EVEX : IMulOpMI8_R<Xi16, WriteIMul16Imm>, DefEFLAGS, PL, PD; + def IMUL32rmi8_EVEX : IMulOpMI8_R<Xi32, WriteIMul32Imm>, DefEFLAGS, PL; + def IMUL64rmi8_EVEX : IMulOpMI8_R<Xi64, WriteIMul64Imm>, DefEFLAGS, PL; + def IMUL16rmi_EVEX : IMulOpMI_RF<Xi16, WriteIMul16Imm>, PL, PD; + def IMUL32rmi_EVEX : IMulOpMI_RF<Xi32, WriteIMul32Imm>, PL; + def IMUL64rmi32_EVEX : IMulOpMI_RF<Xi64, WriteIMul64Imm>, PL; +} //===----------------------------------------------------------------------===// // INC and DEC Instructions // -class IncOpR_RF<X86TypeInfo t> : UnaryOpR_RF<0xFF, MRM0r, "inc", t, null_frag> { +class IncOpR_RF<X86TypeInfo t, bit ndd = 0> : UnaryOpR_RF<0xFF, MRM0r, "inc", t, null_frag, ndd> { let Pattern = [(set t.RegClass:$dst, EFLAGS, (X86add_flag_nocf t.RegClass:$src1, 1))]; } -class DecOpR_RF<X86TypeInfo t> : UnaryOpR_RF<0xFF, MRM1r, "dec", t, null_frag> { +class DecOpR_RF<X86TypeInfo t, bit ndd = 0> : UnaryOpR_RF<0xFF, MRM1r, "dec", t, null_frag, ndd> { let Pattern = [(set t.RegClass:$dst, EFLAGS, (X86sub_flag_nocf t.RegClass:$src1, 1))]; } -class IncOpM_M<X86TypeInfo t> : UnaryOpM_MF<0xFF, MRM0m, "inc", t, null_frag> { +class IncOpR_R<X86TypeInfo t, bit ndd = 0> : UnaryOpR_R<0xFF, MRM0r, "inc", t, null_frag, ndd>; +class DecOpR_R<X86TypeInfo t, bit ndd = 0> : UnaryOpR_R<0xFF, MRM1r, "dec", t, null_frag, ndd>; +class IncOpM_MF<X86TypeInfo t> : UnaryOpM_MF<0xFF, MRM0m, "inc", t, null_frag> { let Pattern = [(store (add (t.LoadNode addr:$src1), 1), addr:$src1), (implicit EFLAGS)]; } -class DecOpM_M<X86TypeInfo t> : UnaryOpM_MF<0xFF, MRM1m, "dec", t, null_frag> { +class DecOpM_MF<X86TypeInfo t> : UnaryOpM_MF<0xFF, MRM1m, "dec", t, null_frag> { let Pattern = [(store (add (t.LoadNode addr:$src1), -1), addr:$src1), (implicit EFLAGS)]; } +class IncOpM_RF<X86TypeInfo t> : UnaryOpM_RF<0xFF, MRM0m, "inc", t, null_frag> { + let Pattern = [(set t.RegClass:$dst, EFLAGS, (add (t.LoadNode addr:$src1), 1))]; +} +class DecOpM_RF<X86TypeInfo t> : UnaryOpM_RF<0xFF, MRM1m, "dec", t, null_frag> { + let Pattern = [(set t.RegClass:$dst, EFLAGS, (add (t.LoadNode addr:$src1), -1))]; +} +class IncOpM_M<X86TypeInfo t> : UnaryOpM_M<0xFF, MRM0m, "inc", t, null_frag>; +class DecOpM_M<X86TypeInfo t> : UnaryOpM_M<0xFF, MRM1m, "dec", t, null_frag>; +class IncOpM_R<X86TypeInfo t> : UnaryOpM_R<0xFF, MRM0m, "inc", t, null_frag>; +class DecOpM_R<X86TypeInfo t> : UnaryOpM_R<0xFF, MRM1m, "dec", t, null_frag>; + // IncDec_Alt - Instructions like "inc reg" short forms. // Short forms only valid in 32-bit mode. Selected during MCInst lowering. class IncDec_Alt<bits<8> o, string m, X86TypeInfo t> : UnaryOpR_RF<o, AddRegFrm, m, t, null_frag>, Requires<[Not64BitMode]>; let isConvertibleToThreeAddress = 1 in { -def INC16r_alt : IncDec_Alt<0x40, "inc", Xi16>, OpSize16; -def INC32r_alt : IncDec_Alt<0x40, "inc", Xi32>, OpSize32; -def DEC16r_alt : IncDec_Alt<0x48, "dec", Xi16>, OpSize16; -def DEC32r_alt : IncDec_Alt<0x48, "dec", Xi32>, OpSize32; -def INC8r : IncOpR_RF<Xi8>; -def INC16r : IncOpR_RF<Xi16>, OpSize16; -def INC32r : IncOpR_RF<Xi32>, OpSize32; -def INC64r : IncOpR_RF<Xi64>; -def DEC8r : DecOpR_RF<Xi8>; -def DEC16r : DecOpR_RF<Xi16>, OpSize16; -def DEC32r : DecOpR_RF<Xi32>, OpSize32; -def DEC64r : DecOpR_RF<Xi64>; + def INC16r_alt : IncDec_Alt<0x40, "inc", Xi16>, OpSize16; + def INC32r_alt : IncDec_Alt<0x40, "inc", Xi32>, OpSize32; + def DEC16r_alt : IncDec_Alt<0x48, "dec", Xi16>, OpSize16; + def DEC32r_alt : IncDec_Alt<0x48, "dec", Xi32>, OpSize32; + let Predicates = [NoNDD] in { + def INC8r : IncOpR_RF<Xi8>; + def INC16r : IncOpR_RF<Xi16>, OpSize16; + def INC32r : IncOpR_RF<Xi32>, OpSize32; + def INC64r : IncOpR_RF<Xi64>; + def DEC8r : DecOpR_RF<Xi8>; + def DEC16r : DecOpR_RF<Xi16>, OpSize16; + def DEC32r : DecOpR_RF<Xi32>, OpSize32; + def DEC64r : DecOpR_RF<Xi64>; + } + let Predicates = [HasNDD, In64BitMode] in { + def INC8r_ND : IncOpR_RF<Xi8, 1>; + def INC16r_ND : IncOpR_RF<Xi16, 1>, PD; + def INC32r_ND : IncOpR_RF<Xi32, 1>; + def INC64r_ND : IncOpR_RF<Xi64, 1>; + def DEC8r_ND : DecOpR_RF<Xi8, 1>; + def DEC16r_ND : DecOpR_RF<Xi16, 1>, PD; + def DEC32r_ND : DecOpR_RF<Xi32, 1>; + def DEC64r_ND : DecOpR_RF<Xi64, 1>; + } + let Predicates = [In64BitMode], Pattern = [(null_frag)] in { + def INC8r_NF : IncOpR_R<Xi8>, NF; + def INC16r_NF : IncOpR_R<Xi16>, NF, PD; + def INC32r_NF : IncOpR_R<Xi32>, NF; + def INC64r_NF : IncOpR_R<Xi64>, NF; + def DEC8r_NF : DecOpR_R<Xi8>, NF; + def DEC16r_NF : DecOpR_R<Xi16>, NF, PD; + def DEC32r_NF : DecOpR_R<Xi32>, NF; + def DEC64r_NF : DecOpR_R<Xi64>, NF; + def INC8r_NF_ND : IncOpR_R<Xi8, 1>, NF; + def INC16r_NF_ND : IncOpR_R<Xi16, 1>, NF, PD; + def INC32r_NF_ND : IncOpR_R<Xi32, 1>, NF; + def INC64r_NF_ND : IncOpR_R<Xi64, 1>, NF; + def DEC8r_NF_ND : DecOpR_R<Xi8, 1>, NF; + def DEC16r_NF_ND : DecOpR_R<Xi16, 1>, NF, PD; + def DEC32r_NF_ND : DecOpR_R<Xi32, 1>, NF; + def DEC64r_NF_ND : DecOpR_R<Xi64, 1>, NF; + def INC8r_EVEX : IncOpR_RF<Xi8>, PL; + def INC16r_EVEX : IncOpR_RF<Xi16>, PL, PD; + def INC32r_EVEX : IncOpR_RF<Xi32>, PL; + def INC64r_EVEX : IncOpR_RF<Xi64>, PL; + def DEC8r_EVEX : DecOpR_RF<Xi8>, PL; + def DEC16r_EVEX : DecOpR_RF<Xi16>, PL, PD; + def DEC32r_EVEX : DecOpR_RF<Xi32>, PL; + def DEC64r_EVEX : DecOpR_RF<Xi64>, PL; + } } let Predicates = [UseIncDec] in { -def INC8m : IncOpM_M<Xi8>; -def INC16m : IncOpM_M<Xi16>, OpSize16; -def INC32m : IncOpM_M<Xi32>, OpSize32; -def DEC8m : DecOpM_M<Xi8>; -def DEC16m : DecOpM_M<Xi16>, OpSize16; -def DEC32m : DecOpM_M<Xi32>, OpSize32; + def INC8m : IncOpM_MF<Xi8>; + def INC16m : IncOpM_MF<Xi16>, OpSize16; + def INC32m : IncOpM_MF<Xi32>, OpSize32; + def DEC8m : DecOpM_MF<Xi8>; + def DEC16m : DecOpM_MF<Xi16>, OpSize16; + def DEC32m : DecOpM_MF<Xi32>, OpSize32; } let Predicates = [UseIncDec, In64BitMode] in { -def INC64m : IncOpM_M<Xi64>; -def DEC64m : DecOpM_M<Xi64>; + def INC64m : IncOpM_MF<Xi64>; + def DEC64m : DecOpM_MF<Xi64>; +} +let Predicates = [HasNDD, In64BitMode, UseIncDec] in { + def INC8m_ND : IncOpM_RF<Xi8>; + def INC16m_ND : IncOpM_RF<Xi16>, PD; + def INC32m_ND : IncOpM_RF<Xi32>; + def DEC8m_ND : DecOpM_RF<Xi8>; + def DEC16m_ND : DecOpM_RF<Xi16>, PD; + def DEC32m_ND : DecOpM_RF<Xi32>; + def INC64m_ND : IncOpM_RF<Xi64>; + def DEC64m_ND : DecOpM_RF<Xi64>; +} +let Predicates = [In64BitMode], Pattern = [(null_frag)] in { + def INC8m_NF : IncOpM_M<Xi8>, NF; + def INC16m_NF : IncOpM_M<Xi16>, NF, PD; + def INC32m_NF : IncOpM_M<Xi32>, NF; + def INC64m_NF : IncOpM_M<Xi64>, NF; + def DEC8m_NF : DecOpM_M<Xi8>, NF; + def DEC16m_NF : DecOpM_M<Xi16>, NF, PD; + def DEC32m_NF : DecOpM_M<Xi32>, NF; + def DEC64m_NF : DecOpM_M<Xi64>, NF; + def INC8m_NF_ND : IncOpM_R<Xi8>, NF; + def INC16m_NF_ND : IncOpM_R<Xi16>, NF, PD; + def INC32m_NF_ND : IncOpM_R<Xi32>, NF; + def INC64m_NF_ND : IncOpM_R<Xi64>, NF; + def DEC8m_NF_ND : DecOpM_R<Xi8>, NF; + def DEC16m_NF_ND : DecOpM_R<Xi16>, NF, PD; + def DEC32m_NF_ND : DecOpM_R<Xi32>, NF; + def DEC64m_NF_ND : DecOpM_R<Xi64>, NF; + def INC8m_EVEX : IncOpM_MF<Xi8>, PL; + def INC16m_EVEX : IncOpM_MF<Xi16>, PL, PD; + def INC32m_EVEX : IncOpM_MF<Xi32>, PL; + def INC64m_EVEX : IncOpM_MF<Xi64>, PL; + def DEC8m_EVEX : DecOpM_MF<Xi8>, PL; + def DEC16m_EVEX : DecOpM_MF<Xi16>, PL, PD; + def DEC32m_EVEX : DecOpM_MF<Xi32>, PL; + def DEC64m_EVEX : DecOpM_MF<Xi64>, PL; } //===----------------------------------------------------------------------===// @@ -350,212 +590,212 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, let isCommutable = CommutableRR, isConvertibleToThreeAddress = ConvertibleToThreeAddressRR in { let Predicates = [NoNDD] in { - def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>; - def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>, OpSize16; - def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>, OpSize32; - def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>; + def 8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>; + def 16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>, OpSize16; + def 32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>, OpSize32; + def 64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>; } let Predicates = [HasNDD, In64BitMode] in { - def NAME#8rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag, 1>; - def NAME#16rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag, 1>, PD; - def NAME#32rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag, 1>; - def NAME#64rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag, 1>; - def NAME#8rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi8, 1>, EVEX_NF; - def NAME#16rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi16, 1>, EVEX_NF, PD; - def NAME#32rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi32, 1>, EVEX_NF; - def NAME#64rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi64, 1>, EVEX_NF; + def 8rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag, 1>; + def 16rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag, 1>, PD; + def 32rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag, 1>; + def 64rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag, 1>; + def 8rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi8, 1>, EVEX_NF; + def 16rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi16, 1>, EVEX_NF, PD; + def 32rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi32, 1>, EVEX_NF; + def 64rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi64, 1>, EVEX_NF; } let Predicates = [In64BitMode] in { - def NAME#8rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi8>, NF; - def NAME#16rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi16>, NF, PD; - def NAME#32rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi32>, NF; - def NAME#64rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi64>, NF; - def NAME#8rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; - def NAME#16rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; - def NAME#32rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL; - def NAME#64rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL; + def 8rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi8>, NF; + def 16rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi16>, NF, PD; + def 32rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi32>, NF; + def 64rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi64>, NF; + def 8rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def 16rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def 32rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def 64rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL; } } - def NAME#8rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>; - def NAME#16rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; - def NAME#32rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; - def NAME#64rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>; + def 8rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>; + def 16rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; + def 32rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; + def 64rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>; let Predicates = [In64BitMode] in { - def NAME#8rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL; - def NAME#16rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD; - def NAME#32rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL; - def NAME#64rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL; - def NAME#8rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>; - def NAME#16rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD; - def NAME#32rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>; - def NAME#64rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>; - def NAME#8rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8>, NF; - def NAME#16rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16>, NF, PD; - def NAME#32rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32>, NF; - def NAME#64rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64>, NF; - def NAME#8rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF; - def NAME#16rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD; - def NAME#32rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF; - def NAME#64rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF; + def 8rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL; + def 16rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD; + def 32rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL; + def 64rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL; + def 8rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>; + def 16rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD; + def 32rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>; + def 64rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>; + def 8rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8>, NF; + def 16rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16>, NF, PD; + def 32rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32>, NF; + def 64rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64>, NF; + def 8rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF; + def 16rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD; + def 32rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF; + def 64rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF; } let Predicates = [NoNDD] in { - def NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>; - def NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>, OpSize16; - def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>, OpSize32; - def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>; + def 8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>; + def 16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>, OpSize16; + def 32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>, OpSize32; + def 64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>; } let Predicates = [HasNDD, In64BitMode] in { - def NAME#8rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag, 1>; - def NAME#16rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag, 1>, PD; - def NAME#32rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag, 1>; - def NAME#64rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag, 1>; - def NAME#8rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF; - def NAME#16rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD; - def NAME#32rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF; - def NAME#64rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF; + def 8rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag, 1>; + def 16rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag, 1>, PD; + def 32rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag, 1>; + def 64rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag, 1>; + def 8rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF; + def 16rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD; + def 32rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF; + def 64rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF; } let Predicates = [In64BitMode] in { - def NAME#8rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi8>, NF; - def NAME#16rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi16>, NF, PD; - def NAME#32rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi32>, NF; - def NAME#64rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi64>, NF; - def NAME#8rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , null_frag>, PL; - def NAME#16rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, null_frag>, PL, PD; - def NAME#32rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, null_frag>, PL; - def NAME#64rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, null_frag>, PL; + def 8rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi8>, NF; + def 16rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi16>, NF, PD; + def 32rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi32>, NF; + def 64rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi64>, NF; + def 8rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , null_frag>, PL; + def 16rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, null_frag>, PL, PD; + def 32rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, null_frag>, PL; + def 64rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, null_frag>, PL; } let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { let Predicates = [NoNDD] in { // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. - def NAME#16ri8 : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; - def NAME#32ri8 : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; - def NAME#64ri8 : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>; - def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; - def NAME#16ri : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM>, OpSize16; - def NAME#32ri : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM>, OpSize32; - def NAME#64ri32: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM>; + def 16ri8 : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; + def 32ri8 : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; + def 64ri8 : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>; + def 8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; + def 16ri : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM>, OpSize16; + def 32ri : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM>, OpSize32; + def 64ri32: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM>; } let Predicates = [HasNDD, In64BitMode] in { - def NAME#16ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD; - def NAME#32ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM, 1>; - def NAME#64ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM, 1>; - def NAME#8ri_ND : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM, 1>; - def NAME#16ri_ND : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM, 1>, PD; - def NAME#32ri_ND : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM, 1>; - def NAME#64ri32_ND: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM, 1>; - def NAME#16ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD; - def NAME#32ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM, 1>, EVEX_NF; - def NAME#64ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM, 1>, EVEX_NF; - def NAME#8ri_NF_ND : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM, 1>, EVEX_NF; - def NAME#16ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD; - def NAME#32ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM, 1>, EVEX_NF; - def NAME#64ri32_NF_ND : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM, 1>, EVEX_NF; + def 16ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD; + def 32ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM, 1>; + def 64ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM, 1>; + def 8ri_ND : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM, 1>; + def 16ri_ND : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM, 1>, PD; + def 32ri_ND : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM, 1>; + def 64ri32_ND: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM, 1>; + def 16ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD; + def 32ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM, 1>, EVEX_NF; + def 64ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM, 1>, EVEX_NF; + def 8ri_NF_ND : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM, 1>, EVEX_NF; + def 16ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD; + def 32ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM, 1>, EVEX_NF; + def 64ri32_NF_ND : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM, 1>, EVEX_NF; } let Predicates = [In64BitMode] in { - def NAME#16ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM>, NF, PD; - def NAME#32ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM>, NF; - def NAME#64ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM>, NF; - def NAME#8ri_NF : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM>, NF; - def NAME#16ri_NF : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM>, NF, PD; - def NAME#32ri_NF : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM>, NF; - def NAME#64ri32_NF : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM>, NF; - def NAME#16ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD; - def NAME#32ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, PL; - def NAME#64ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>, PL; - def NAME#8ri_EVEX : BinOpRI_RF<0x80, mnemonic, Xi8 , null_frag, RegMRM>, PL; - def NAME#16ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi16, null_frag, RegMRM>, PL, PD; - def NAME#32ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi32, null_frag, RegMRM>, PL; - def NAME#64ri32_EVEX: BinOpRI_RF<0x81, mnemonic, Xi64, null_frag, RegMRM>, PL; + def 16ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM>, NF, PD; + def 32ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM>, NF; + def 64ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM>, NF; + def 8ri_NF : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM>, NF; + def 16ri_NF : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM>, NF, PD; + def 32ri_NF : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM>, NF; + def 64ri32_NF : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM>, NF; + def 16ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD; + def 32ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, PL; + def 64ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>, PL; + def 8ri_EVEX : BinOpRI_RF<0x80, mnemonic, Xi8 , null_frag, RegMRM>, PL; + def 16ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi16, null_frag, RegMRM>, PL, PD; + def 32ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi32, null_frag, RegMRM>, PL; + def 64ri32_EVEX: BinOpRI_RF<0x81, mnemonic, Xi64, null_frag, RegMRM>, PL; } } - def NAME#8mr : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr : BinOpMR_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32mr : BinOpMR_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64mr : BinOpMR_MF<BaseOpc, mnemonic, Xi64, opnode>; + def 8mr : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , opnode>; + def 16mr : BinOpMR_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def 32mr : BinOpMR_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def 64mr : BinOpMR_MF<BaseOpc, mnemonic, Xi64, opnode>; let Predicates = [HasNDD, In64BitMode] in { - def NAME#8mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi16, opnode>, PD; - def NAME#32mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi32, opnode>; - def NAME#64mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi64, opnode>; - def NAME#8mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi8>, EVEX_NF; - def NAME#16mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi16>, EVEX_NF, PD; - def NAME#32mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi32>, EVEX_NF; - def NAME#64mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi64>, EVEX_NF; + def 8mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi8 , opnode>; + def 16mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi16, opnode>, PD; + def 32mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi32, opnode>; + def 64mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi64, opnode>; + def 8mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi8>, EVEX_NF; + def 16mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi16>, EVEX_NF, PD; + def 32mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi32>, EVEX_NF; + def 64mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi64>, EVEX_NF; } let Predicates = [In64BitMode] in { - def NAME#8mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi8>, NF; - def NAME#16mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi16>, NF, PD; - def NAME#32mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi32>, NF; - def NAME#64mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi64>, NF; - def NAME#8mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; - def NAME#16mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; - def NAME#32mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL; - def NAME#64mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL; + def 8mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi8>, NF; + def 16mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi16>, NF, PD; + def 32mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi32>, NF; + def 64mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi64>, NF; + def 8mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def 16mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def 32mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def 64mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL; } // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. - def NAME#16mi8 : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, OpSize16; - def NAME#32mi8 : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, OpSize32; + def 16mi8 : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, OpSize16; + def 32mi8 : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi8 : BinOpMI8_MF<mnemonic, Xi64, MemMRM>; - def NAME#8mi : BinOpMI_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi : BinOpMI_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; - def NAME#32mi : BinOpMI_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; + def 64mi8 : BinOpMI8_MF<mnemonic, Xi64, MemMRM>; + def 8mi : BinOpMI_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def 16mi : BinOpMI_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; + def 32mi : BinOpMI_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi32 : BinOpMI_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; + def 64mi32 : BinOpMI_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; let Predicates = [HasNDD, In64BitMode] in { - def NAME#16mi8_ND : BinOpMI8_RF<mnemonic, Xi16, MemMRM>, PD; - def NAME#32mi8_ND : BinOpMI8_RF<mnemonic, Xi32, MemMRM>; - def NAME#64mi8_ND : BinOpMI8_RF<mnemonic, Xi64, MemMRM>; - def NAME#8mi_ND : BinOpMI_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi_ND : BinOpMI_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD; - def NAME#32mi_ND : BinOpMI_RF<0x81, mnemonic, Xi32, opnode, MemMRM>; - def NAME#64mi32_ND : BinOpMI_RF<0x81, mnemonic, Xi64, opnode, MemMRM>; - def NAME#16mi8_NF_ND : BinOpMI8_R<mnemonic, Xi16, MemMRM>, NF, PD; - def NAME#32mi8_NF_ND : BinOpMI8_R<mnemonic, Xi32, MemMRM>, NF; - def NAME#64mi8_NF_ND : BinOpMI8_R<mnemonic, Xi64, MemMRM>, NF; - def NAME#8mi_NF_ND : BinOpMI_R<0x80, mnemonic, Xi8, MemMRM>, NF; - def NAME#16mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi16, MemMRM>, NF, PD; - def NAME#32mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi32, MemMRM>, NF; - def NAME#64mi32_NF_ND : BinOpMI_R<0x81, mnemonic, Xi64, MemMRM>, NF; + def 16mi8_ND : BinOpMI8_RF<mnemonic, Xi16, MemMRM>, PD; + def 32mi8_ND : BinOpMI8_RF<mnemonic, Xi32, MemMRM>; + def 64mi8_ND : BinOpMI8_RF<mnemonic, Xi64, MemMRM>; + def 8mi_ND : BinOpMI_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def 16mi_ND : BinOpMI_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD; + def 32mi_ND : BinOpMI_RF<0x81, mnemonic, Xi32, opnode, MemMRM>; + def 64mi32_ND : BinOpMI_RF<0x81, mnemonic, Xi64, opnode, MemMRM>; + def 16mi8_NF_ND : BinOpMI8_R<mnemonic, Xi16, MemMRM>, NF, PD; + def 32mi8_NF_ND : BinOpMI8_R<mnemonic, Xi32, MemMRM>, NF; + def 64mi8_NF_ND : BinOpMI8_R<mnemonic, Xi64, MemMRM>, NF; + def 8mi_NF_ND : BinOpMI_R<0x80, mnemonic, Xi8, MemMRM>, NF; + def 16mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi16, MemMRM>, NF, PD; + def 32mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi32, MemMRM>, NF; + def 64mi32_NF_ND : BinOpMI_R<0x81, mnemonic, Xi64, MemMRM>, NF; } let Predicates = [In64BitMode] in { - def NAME#16mi8_NF : BinOpMI8_M<mnemonic, Xi16, MemMRM>, NF, PD; - def NAME#32mi8_NF : BinOpMI8_M<mnemonic, Xi32, MemMRM>, NF; - def NAME#64mi8_NF : BinOpMI8_M<mnemonic, Xi64, MemMRM>, NF; - def NAME#8mi_NF : BinOpMI_M<0x80, mnemonic, Xi8, MemMRM>, NF; - def NAME#16mi_NF : BinOpMI_M<0x81, mnemonic, Xi16, MemMRM>, NF, PD; - def NAME#32mi_NF : BinOpMI_M<0x81, mnemonic, Xi32, MemMRM>, NF; - def NAME#64mi32_NF : BinOpMI_M<0x81, mnemonic, Xi64, MemMRM>, NF; - def NAME#16mi8_EVEX : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, PL, PD; - def NAME#32mi8_EVEX : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, PL; - def NAME#64mi8_EVEX : BinOpMI8_MF<mnemonic, Xi64, MemMRM>, PL; - def NAME#8mi_EVEX : BinOpMI_MF<0x80, mnemonic, Xi8 , null_frag, MemMRM>, PL; - def NAME#16mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi16, null_frag, MemMRM>, PL, PD; - def NAME#32mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi32, null_frag, MemMRM>, PL; - def NAME#64mi32_EVEX : BinOpMI_MF<0x81, mnemonic, Xi64, null_frag, MemMRM>, PL; + def 16mi8_NF : BinOpMI8_M<mnemonic, Xi16, MemMRM>, NF, PD; + def 32mi8_NF : BinOpMI8_M<mnemonic, Xi32, MemMRM>, NF; + def 64mi8_NF : BinOpMI8_M<mnemonic, Xi64, MemMRM>, NF; + def 8mi_NF : BinOpMI_M<0x80, mnemonic, Xi8, MemMRM>, NF; + def 16mi_NF : BinOpMI_M<0x81, mnemonic, Xi16, MemMRM>, NF, PD; + def 32mi_NF : BinOpMI_M<0x81, mnemonic, Xi32, MemMRM>, NF; + def 64mi32_NF : BinOpMI_M<0x81, mnemonic, Xi64, MemMRM>, NF; + def 16mi8_EVEX : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, PL, PD; + def 32mi8_EVEX : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, PL; + def 64mi8_EVEX : BinOpMI8_MF<mnemonic, Xi64, MemMRM>, PL; + def 8mi_EVEX : BinOpMI_MF<0x80, mnemonic, Xi8 , null_frag, MemMRM>, PL; + def 16mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi16, null_frag, MemMRM>, PL, PD; + def 32mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi32, null_frag, MemMRM>, PL; + def 64mi32_EVEX : BinOpMI_MF<0x81, mnemonic, Xi64, null_frag, MemMRM>, PL; } // These are for the disassembler since 0x82 opcode behaves like 0x80, but // not in 64-bit mode. let Predicates = [Not64BitMode] in { - def NAME#8ri8 : BinOpRI8_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; - def NAME#8mi8 : BinOpMI8_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly; + def 8ri8 : BinOpRI8_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; + def 8mi8 : BinOpMI8_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly; } - def NAME#8i8 : BinOpAI_AF<BaseOpc4, mnemonic, Xi8 , AL, + def 8i8 : BinOpAI_AF<BaseOpc4, mnemonic, Xi8 , AL, "{$src, %al|al, $src}">; - def NAME#16i16 : BinOpAI_AF<BaseOpc4, mnemonic, Xi16, AX, + def 16i16 : BinOpAI_AF<BaseOpc4, mnemonic, Xi16, AX, "{$src, %ax|ax, $src}">, OpSize16; - def NAME#32i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi32, EAX, + def 32i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi32, EAX, "{$src, %eax|eax, $src}">, OpSize32; - def NAME#64i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi64, RAX, + def 64i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi64, RAX, "{$src, %rax|rax, $src}">; } @@ -571,162 +811,162 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, bit ConvertibleToThreeAddress> { let isCommutable = CommutableRR in { let Predicates = [NoNDD] in { - def NAME#8rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode>; + def 8rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def NAME#16rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode>; + def 16rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def 32rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def 64rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode>; } } let Predicates = [HasNDD, In64BitMode] in { - def NAME#8rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode, 1>; + def 8rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode, 1>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def NAME#16rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode, 1>, PD; - def NAME#32rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode, 1>; - def NAME#64rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode, 1>; + def 16rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode, 1>, PD; + def 32rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode, 1>; + def 64rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode, 1>; } } } // isCommutable let Predicates = [In64BitMode] in { - def NAME#8rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; - def NAME#16rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; - def NAME#32rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL; - def NAME#64rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL; + def 8rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def 16rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def 32rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def 64rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL; } - def NAME#8rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>; - def NAME#16rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; - def NAME#32rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; - def NAME#64rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>; + def 8rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>; + def 16rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; + def 32rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; + def 64rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>; let Predicates = [In64BitMode] in { - def NAME#8rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>; - def NAME#16rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD; - def NAME#32rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>; - def NAME#64rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>; - def NAME#8rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL; - def NAME#16rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD; - def NAME#32rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL; - def NAME#64rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL; + def 8rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>; + def 16rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD; + def 32rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>; + def 64rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>; + def 8rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL; + def 16rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD; + def 32rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL; + def 64rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL; } let Predicates = [NoNDD] in { - def NAME#8rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>; - def NAME#16rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>; + def 8rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>; + def 16rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16; + def 32rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32; + def 64rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>; } let Predicates = [HasNDD, In64BitMode] in { - def NAME#8rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode, 1>; - def NAME#16rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode, 1>, PD; - def NAME#32rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode, 1>; - def NAME#64rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode, 1>; + def 8rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode, 1>; + def 16rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode, 1>, PD; + def 32rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode, 1>; + def 64rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode, 1>; } let Predicates = [In64BitMode] in { - def NAME#8rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>, PL; - def NAME#16rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, PL, PD; - def NAME#32rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, PL; - def NAME#64rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>, PL; + def 8rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>, PL; + def 16rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, PL, PD; + def 32rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, PL; + def 64rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>, PL; } let Predicates = [NoNDD] in { - def NAME#8ri : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>; + def 8ri : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. - def NAME#16ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; - def NAME#32ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; - def NAME#64ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>; + def 16ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; + def 32ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; + def 64ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>; - def NAME#16ri : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16; - def NAME#32ri : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32; - def NAME#64ri32: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>; + def 16ri : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16; + def 32ri : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32; + def 64ri32: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>; } } let Predicates = [HasNDD, In64BitMode] in { - def NAME#8ri_ND : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM, 1>; + def 8ri_ND : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM, 1>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def NAME#16ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD; - def NAME#32ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM, 1>; - def NAME#64ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM, 1>; - def NAME#16ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM, 1>, PD; - def NAME#32ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM, 1>; - def NAME#64ri32_ND: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM, 1>; + def 16ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD; + def 32ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM, 1>; + def 64ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM, 1>; + def 16ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM, 1>, PD; + def 32ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM, 1>; + def 64ri32_ND: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM, 1>; } } let Predicates = [In64BitMode] in { - def NAME#8ri_EVEX : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>, PL; - def NAME#16ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD; - def NAME#32ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, PL; - def NAME#64ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>, PL; - def NAME#16ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, PL, PD; - def NAME#32ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, PL; - def NAME#64ri32_EVEX: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>, PL; + def 8ri_EVEX : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>, PL; + def 16ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD; + def 32ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, PL; + def 64ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>, PL; + def 16ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, PL, PD; + def 32ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, PL; + def 64ri32_EVEX: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>, PL; } - def NAME#8mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, opnode>; + def 8mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , opnode>; + def 16mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def 32mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def 64mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, opnode>; let Predicates = [HasNDD, In64BitMode] in { - def NAME#8mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi16, opnode>, PD; - def NAME#32mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi32, opnode>; - def NAME#64mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi64, opnode>; + def 8mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi8 , opnode>; + def 16mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi16, opnode>, PD; + def 32mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi32, opnode>; + def 64mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi64, opnode>; } let Predicates = [In64BitMode] in { - def NAME#8mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; - def NAME#16mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; - def NAME#32mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL; - def NAME#64mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL; + def 8mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def 16mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def 32mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def 64mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL; } // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. - def NAME#8mi : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi8 : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, OpSize16; - def NAME#32mi8 : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, OpSize32; + def 8mi : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def 16mi8 : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, OpSize16; + def 32mi8 : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi8 : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>; - def NAME#16mi : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; - def NAME#32mi : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; + def 64mi8 : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>; + def 16mi : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; + def 32mi : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi32 : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; + def 64mi32 : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; let Predicates = [HasNDD, In64BitMode] in { - def NAME#8mi_ND : BinOpMIF_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi8_ND : BinOpMI8F_RF<mnemonic, Xi16, MemMRM>, PD; - def NAME#32mi8_ND : BinOpMI8F_RF<mnemonic, Xi32, MemMRM>; - def NAME#64mi8_ND : BinOpMI8F_RF<mnemonic, Xi64, MemMRM>; - def NAME#16mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD; - def NAME#32mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi32, opnode, MemMRM>; - def NAME#64mi32_ND : BinOpMIF_RF<0x81, mnemonic, Xi64, opnode, MemMRM>; + def 8mi_ND : BinOpMIF_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def 16mi8_ND : BinOpMI8F_RF<mnemonic, Xi16, MemMRM>, PD; + def 32mi8_ND : BinOpMI8F_RF<mnemonic, Xi32, MemMRM>; + def 64mi8_ND : BinOpMI8F_RF<mnemonic, Xi64, MemMRM>; + def 16mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD; + def 32mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi32, opnode, MemMRM>; + def 64mi32_ND : BinOpMIF_RF<0x81, mnemonic, Xi64, opnode, MemMRM>; } let Predicates = [In64BitMode] in { - def NAME#8mi_EVEX : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>, PL; - def NAME#16mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, PL, PD; - def NAME#32mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, PL; - def NAME#64mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>, PL; - def NAME#16mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, PL, PD; - def NAME#32mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, PL; - def NAME#64mi32_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>, PL; + def 8mi_EVEX : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>, PL; + def 16mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, PL, PD; + def 32mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, PL; + def 64mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>, PL; + def 16mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, PL, PD; + def 32mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, PL; + def 64mi32_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>, PL; } // These are for the disassembler since 0x82 opcode behaves like 0x80, but // not in 64-bit mode. let Predicates = [Not64BitMode] in { - def NAME#8ri8 : BinOpRI8F_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; - def NAME#8mi8 : BinOpMI8F_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly; + def 8ri8 : BinOpRI8F_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; + def 8mi8 : BinOpMI8F_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly; } - def NAME#8i8 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi8 , AL, + def 8i8 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi8 , AL, "{$src, %al|al, $src}">; - def NAME#16i16 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi16, AX, + def 16i16 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi16, AX, "{$src, %ax|ax, $src}">, OpSize16; - def NAME#32i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi32, EAX, + def 32i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi32, EAX, "{$src, %eax|eax, $src}">, OpSize32; - def NAME#64i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi64, RAX, + def 64i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi64, RAX, "{$src, %rax|rax, $src}">; } @@ -739,71 +979,71 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, SDNode opnode, bit CommutableRR, bit ConvertibleToThreeAddress> { let isCommutable = CommutableRR in { - def NAME#8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>; + def 8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>; + def 16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def 32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def 64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>; } // isConvertibleToThreeAddress } // isCommutable - def NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>; - def NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; - def NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; - def NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>; + def 8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>; + def 16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; + def 32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; + def 64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>; - def NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>; - def NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>; + def 8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>; + def 16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16; + def 32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32; + def 64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>; - def NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>; + def 8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. - def NAME#16ri8 : BinOpRI8_F<0x83, mnemonic, Xi16, RegMRM>, OpSize16; - def NAME#32ri8 : BinOpRI8_F<0x83, mnemonic, Xi32, RegMRM>, OpSize32; - def NAME#64ri8 : BinOpRI8_F<0x83, mnemonic, Xi64, RegMRM>; + def 16ri8 : BinOpRI8_F<0x83, mnemonic, Xi16, RegMRM>, OpSize16; + def 32ri8 : BinOpRI8_F<0x83, mnemonic, Xi32, RegMRM>, OpSize32; + def 64ri8 : BinOpRI8_F<0x83, mnemonic, Xi64, RegMRM>; - def NAME#16ri : BinOpRI_F<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16; - def NAME#32ri : BinOpRI_F<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32; - def NAME#64ri32: BinOpRI_F<0x81, mnemonic, Xi64, opnode, RegMRM>; + def 16ri : BinOpRI_F<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16; + def 32ri : BinOpRI_F<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32; + def 64ri32: BinOpRI_F<0x81, mnemonic, Xi64, opnode, RegMRM>; } - def NAME#8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>; + def 8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>; + def 16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def 32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def 64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>; // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. - def NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, MemMRM>, OpSize16; - def NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, MemMRM>, OpSize32; + def 16mi8 : BinOpMI8_F<mnemonic, Xi16, MemMRM>, OpSize16; + def 32mi8 : BinOpMI8_F<mnemonic, Xi32, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, MemMRM>; + def 64mi8 : BinOpMI8_F<mnemonic, Xi64, MemMRM>; - def NAME#8mi : BinOpMI_F<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi : BinOpMI_F<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; - def NAME#32mi : BinOpMI_F<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; + def 8mi : BinOpMI_F<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def 16mi : BinOpMI_F<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; + def 32mi : BinOpMI_F<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi32 : BinOpMI_F<0x81, mnemonic, Xi64, opnode, MemMRM>; + def 64mi32 : BinOpMI_F<0x81, mnemonic, Xi64, opnode, MemMRM>; // These are for the disassembler since 0x82 opcode behaves like 0x80, but // not in 64-bit mode. let Predicates = [Not64BitMode] in { - def NAME#8ri8 : BinOpRI8_F<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; + def 8ri8 : BinOpRI8_F<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; let mayLoad = 1 in - def NAME#8mi8 : BinOpMI8_F<mnemonic, Xi8, MemMRM>; + def 8mi8 : BinOpMI8_F<mnemonic, Xi8, MemMRM>; } - def NAME#8i8 : BinOpAI_F<BaseOpc4, mnemonic, Xi8 , AL, + def 8i8 : BinOpAI_F<BaseOpc4, mnemonic, Xi8 , AL, "{$src, %al|al, $src}">; - def NAME#16i16 : BinOpAI_F<BaseOpc4, mnemonic, Xi16, AX, + def 16i16 : BinOpAI_F<BaseOpc4, mnemonic, Xi16, AX, "{$src, %ax|ax, $src}">, OpSize16; - def NAME#32i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi32, EAX, + def 32i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi32, EAX, "{$src, %eax|eax, $src}">, OpSize32; - def NAME#64i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi64, RAX, + def 64i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi64, RAX, "{$src, %rax|rax, $src}">; } @@ -1119,14 +1359,34 @@ defm MULX64 : MulX<Xi64, WriteMULX64>, REX_W; // We don't have patterns for these as there is no advantage over ADC for // most code. let Form = MRMSrcReg in { -def ADCX32rr : BinOpRRF_RF<0xF6, "adcx", Xi32, null_frag>, T8, PD; -def ADCX64rr : BinOpRRF_RF<0xF6, "adcx", Xi64, null_frag>, T8, PD; -def ADOX32rr : BinOpRRF_RF<0xF6, "adox", Xi32, null_frag>, T8, XS; -def ADOX64rr : BinOpRRF_RF<0xF6, "adox", Xi64, null_frag>, T8, XS; + def ADCX32rr : BinOpRRF_RF<0xF6, "adcx", Xi32>, T8, PD; + def ADCX64rr : BinOpRRF_RF<0xF6, "adcx", Xi64>, T8, PD; + def ADOX32rr : BinOpRRF_RF<0xF6, "adox", Xi32>, T8, XS; + def ADOX64rr : BinOpRRF_RF<0xF6, "adox", Xi64>, T8, XS; + let Predicates =[In64BitMode] in { + def ADCX32rr_EVEX : BinOpRRF_RF<0x66, "adcx", Xi32>, EVEX, T_MAP4, PD; + def ADCX64rr_EVEX : BinOpRRF_RF<0x66, "adcx", Xi64>, EVEX, T_MAP4, PD; + def ADOX32rr_EVEX : BinOpRRF_RF<0x66, "adox", Xi32>, EVEX, T_MAP4, XS; + def ADOX64rr_EVEX : BinOpRRF_RF<0x66, "adox", Xi64>, EVEX, T_MAP4, XS; + def ADCX32rr_ND : BinOpRRF_RF<0x66, "adcx", Xi32, null_frag, 1>, PD; + def ADCX64rr_ND : BinOpRRF_RF<0x66, "adcx", Xi64, null_frag, 1>, PD; + def ADOX32rr_ND : BinOpRRF_RF<0x66, "adox", Xi32, null_frag, 1>, XS; + def ADOX64rr_ND : BinOpRRF_RF<0x66, "adox", Xi64, null_frag, 1>, XS; + } } let Form = MRMSrcMem in { -def ADCX32rm : BinOpRMF_RF<0xF6, "adcx", Xi32, null_frag>, T8, PD; -def ADCX64rm : BinOpRMF_RF<0xF6, "adcx", Xi64, null_frag>, T8, PD; -def ADOX32rm : BinOpRMF_RF<0xF6, "adox", Xi32, null_frag>, T8, XS; -def ADOX64rm : BinOpRMF_RF<0xF6, "adox", Xi64, null_frag>, T8, XS; + def ADCX32rm : BinOpRMF_RF<0xF6, "adcx", Xi32>, T8, PD; + def ADCX64rm : BinOpRMF_RF<0xF6, "adcx", Xi64>, T8, PD; + def ADOX32rm : BinOpRMF_RF<0xF6, "adox", Xi32>, T8, XS; + def ADOX64rm : BinOpRMF_RF<0xF6, "adox", Xi64>, T8, XS; + let Predicates =[In64BitMode] in { + def ADCX32rm_EVEX : BinOpRMF_RF<0x66, "adcx", Xi32>, EVEX, T_MAP4, PD; + def ADCX64rm_EVEX : BinOpRMF_RF<0x66, "adcx", Xi64>, EVEX, T_MAP4, PD; + def ADOX32rm_EVEX : BinOpRMF_RF<0x66, "adox", Xi32>, EVEX, T_MAP4, XS; + def ADOX64rm_EVEX : BinOpRMF_RF<0x66, "adox", Xi64>, EVEX, T_MAP4, XS; + def ADCX32rm_ND : BinOpRMF_RF<0x66, "adcx", Xi32, null_frag, 1>, PD; + def ADCX64rm_ND : BinOpRMF_RF<0x66, "adcx", Xi64, null_frag, 1>, PD; + def ADOX32rm_ND : BinOpRMF_RF<0x66, "adox", Xi32, null_frag, 1>, XS; + def ADOX64rm_ND : BinOpRMF_RF<0x66, "adox", Xi64, null_frag, 1>, XS; + } } diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td index 6e76b44b66a3..8798b13a1761 100644 --- a/llvm/lib/Target/X86/X86InstrFormats.td +++ b/llvm/lib/Target/X86/X86InstrFormats.td @@ -247,8 +247,6 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, bit hasREPPrefix = 0; // Does this inst have a REP prefix? bits<2> OpEncBits = OpEnc.Value; bit IgnoresW = 0; // Does this inst ignore REX_W field? - bit EVEX_W1_VEX_W0 = 0; // This EVEX inst with VEX.W==1 can become a VEX - // instruction with VEX.W == 0. bit hasVEX_4V = 0; // Does this inst require the VEX.VVVV field? bit hasVEX_L = 0; // Does this inst use large (256-bit) registers? bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit @@ -279,10 +277,6 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, CD8_EltSize, !srl(VectSize, CD8_Form{1-0}))), 0); - // Used to prevent an explicit EVEX2VEX override for this instruction. - string EVEX2VEXOverride = ?; - - bit notEVEX2VEXConvertible = 0; // Prevent EVEX->VEX conversion. ExplicitOpPrefix explicitOpPrefix = NoExplicitOpPrefix; bits<2> explicitOpPrefixBits = explicitOpPrefix.Value; // TSFlags layout should be kept in sync with X86BaseInfo.h. diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index eac8d79eb8a3..eb0734f9a618 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -29,8 +29,10 @@ class X86Subtarget; namespace X86 { enum AsmComments { + // For instr that was compressed from EVEX to LEGACY. + AC_EVEX_2_LEGACY = MachineInstr::TAsmComments, // For instr that was compressed from EVEX to VEX. - AC_EVEX_2_VEX = MachineInstr::TAsmComments + AC_EVEX_2_VEX = AC_EVEX_2_LEGACY << 1 }; /// Return a pair of condition code for the given predicate and whether diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td index 305bd74f7bd7..97c625a64cfc 100644 --- a/llvm/lib/Target/X86/X86InstrMisc.td +++ b/llvm/lib/Target/X86/X86InstrMisc.td @@ -1212,36 +1212,33 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in { (implicit EFLAGS)]>, TB, XS, Sched<[WriteTZCNTLd]>; } -multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM, - RegisterClass RC, X86MemOperand x86memop, - X86FoldableSchedWrite sched, string Suffix = ""> { -let hasSideEffects = 0 in { - def rr#Suffix : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src), - !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>, - T8, VEX, VVVV, Sched<[sched]>; - let mayLoad = 1 in - def rm#Suffix : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src), - !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>, - T8, VEX, VVVV, Sched<[sched.Folded]>; -} +multiclass Bls<string m, Format RegMRM, Format MemMRM, X86TypeInfo t, string Suffix = ""> { + let SchedRW = [WriteBLS] in { + def rr#Suffix : UnaryOpR<0xF3, RegMRM, m, unaryop_ndd_args, t, + (outs t.RegClass:$dst), []>, T8, VVVV; + } + + let SchedRW = [WriteBLS.Folded] in + def rm#Suffix : UnaryOpM<0xF3, MemMRM, m, unaryop_ndd_args, t, + (outs t.RegClass:$dst), []>, T8, VVVV; } -let Predicates = [HasBMI, NoEGPR], Defs = [EFLAGS] in { - defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS>; - defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS>, REX_W; - defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS>; - defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, WriteBLS>, REX_W; - defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, WriteBLS>; - defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS>, REX_W; +let Predicates = [HasBMI], Defs = [EFLAGS] in { + defm BLSR32 : Bls<"blsr", MRM1r, MRM1m, Xi32>, VEX; + defm BLSR64 : Bls<"blsr", MRM1r, MRM1m, Xi64>, VEX; + defm BLSMSK32 : Bls<"blsmsk", MRM2r, MRM2m, Xi32>, VEX; + defm BLSMSK64 : Bls<"blsmsk", MRM2r, MRM2m, Xi64>, VEX; + defm BLSI32 : Bls<"blsi", MRM3r, MRM3m, Xi32>, VEX; + defm BLSI64 : Bls<"blsi", MRM3r, MRM3m, Xi64>, VEX; } -let Predicates = [HasBMI, HasEGPR], Defs = [EFLAGS] in { - defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX; - defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX; - defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX; - defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX; - defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX; - defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX; +let Predicates = [HasBMI, In64BitMode], Defs = [EFLAGS] in { + defm BLSR32 : Bls<"blsr", MRM1r, MRM1m, Xi32, "_EVEX">, EVEX; + defm BLSR64 : Bls<"blsr", MRM1r, MRM1m, Xi64, "_EVEX">, EVEX; + defm BLSMSK32 : Bls<"blsmsk", MRM2r, MRM2m, Xi32, "_EVEX">, EVEX; + defm BLSMSK64 : Bls<"blsmsk", MRM2r, MRM2m, Xi64, "_EVEX">, EVEX; + defm BLSI32 : Bls<"blsi", MRM3r, MRM3m, Xi32, "_EVEX">, EVEX; + defm BLSI64 : Bls<"blsi", MRM3r, MRM3m, Xi64, "_EVEX">, EVEX; } let Predicates = [HasBMI] in { @@ -1281,50 +1278,35 @@ let Predicates = [HasBMI] in { (BLSI64rr GR64:$src)>; } -multiclass bmi4VOp3_base<bits<8> opc, string mnemonic, RegisterClass RC, - X86MemOperand x86memop, SDPatternOperator OpNode, - PatFrag ld_frag, X86FoldableSchedWrite Sched, - string Suffix = ""> { - def rr#Suffix : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2), - !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (OpNode RC:$src1, RC:$src2)), (implicit EFLAGS)]>, - T8, VEX, Sched<[Sched]>; -let mayLoad = 1 in - def rm#Suffix : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2), - !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (OpNode (ld_frag addr:$src1), RC:$src2)), - (implicit EFLAGS)]>, T8, VEX, - Sched<[Sched.Folded, - // x86memop:$src1 - ReadDefault, ReadDefault, ReadDefault, ReadDefault, - ReadDefault, - // RC:$src2 - Sched.ReadAfterFold]>; +multiclass Bmi4VOp3<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, + X86FoldableSchedWrite sched, string Suffix = ""> { + let SchedRW = [sched], Form = MRMSrcReg4VOp3 in + def rr#Suffix : BinOpRR<o, m, binop_ndd_args, t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, + (node t.RegClass:$src1, t.RegClass:$src2))]>, T8; + let SchedRW = [sched.Folded, + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + sched.ReadAfterFold], Form = MRMSrcMem4VOp3 in + def rm#Suffix : BinOpMR<o, m, binop_ndd_args, t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, (node (t.LoadNode addr:$src1), + t.RegClass:$src2))]>, T8; } let Predicates = [HasBMI, NoEGPR], Defs = [EFLAGS] in { - defm BEXTR32 : bmi4VOp3_base<0xF7, "bextr{l}", GR32, i32mem, - X86bextr, loadi32, WriteBEXTR>; - defm BEXTR64 : bmi4VOp3_base<0xF7, "bextr{q}", GR64, i64mem, - X86bextr, loadi64, WriteBEXTR>, REX_W; + defm BEXTR32 : Bmi4VOp3<0xF7, "bextr", Xi32, X86bextr, WriteBEXTR>, VEX; + defm BEXTR64 : Bmi4VOp3<0xF7, "bextr", Xi64, X86bextr, WriteBEXTR>, VEX; } let Predicates = [HasBMI2, NoEGPR], Defs = [EFLAGS] in { - defm BZHI32 : bmi4VOp3_base<0xF5, "bzhi{l}", GR32, i32mem, - X86bzhi, loadi32, WriteBZHI>; - defm BZHI64 : bmi4VOp3_base<0xF5, "bzhi{q}", GR64, i64mem, - X86bzhi, loadi64, WriteBZHI>, REX_W; + defm BZHI32 : Bmi4VOp3<0xF5, "bzhi", Xi32, X86bzhi, WriteBZHI>, VEX; + defm BZHI64 : Bmi4VOp3<0xF5, "bzhi", Xi64, X86bzhi, WriteBZHI>, VEX; } -let Predicates = [HasBMI, HasEGPR], Defs = [EFLAGS] in { - defm BEXTR32 : bmi4VOp3_base<0xF7, "bextr{l}", GR32, i32mem, - X86bextr, loadi32, WriteBEXTR, "_EVEX">, EVEX; - defm BEXTR64 : bmi4VOp3_base<0xF7, "bextr{q}", GR64, i64mem, - X86bextr, loadi64, WriteBEXTR, "_EVEX">, EVEX, REX_W; +let Predicates = [HasBMI, HasEGPR, In64BitMode], Defs = [EFLAGS] in { + defm BEXTR32 : Bmi4VOp3<0xF7, "bextr", Xi32, X86bextr, WriteBEXTR, "_EVEX">, EVEX; + defm BEXTR64 : Bmi4VOp3<0xF7, "bextr", Xi64, X86bextr, WriteBEXTR, "_EVEX">, EVEX; } -let Predicates = [HasBMI2, HasEGPR], Defs = [EFLAGS] in { - defm BZHI32 : bmi4VOp3_base<0xF5, "bzhi{l}", GR32, i32mem, - X86bzhi, loadi32, WriteBZHI, "_EVEX">, EVEX; - defm BZHI64 : bmi4VOp3_base<0xF5, "bzhi{q}", GR64, i64mem, - X86bzhi, loadi64, WriteBZHI, "_EVEX">, EVEX, REX_W; +let Predicates = [HasBMI2, HasEGPR, In64BitMode], Defs = [EFLAGS] in { + defm BZHI32 : Bmi4VOp3<0xF5, "bzhi", Xi32, X86bzhi, WriteBZHI, "_EVEX">, EVEX; + defm BZHI64 : Bmi4VOp3<0xF5, "bzhi", Xi64, X86bzhi, WriteBZHI, "_EVEX">, EVEX; } def CountTrailingOnes : SDNodeXForm<imm, [{ @@ -1371,22 +1353,22 @@ multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC, def rr#Suffix : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>, - VEX, VVVV, Sched<[WriteALU]>; + NoCD8, VVVV, Sched<[WriteALU]>; def rm#Suffix : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (OpNode RC:$src1, (ld_frag addr:$src2)))]>, - VEX, VVVV, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>; + NoCD8, VVVV, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>; } let Predicates = [HasBMI2, NoEGPR] in { defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem, - X86pdep, loadi32>, T8, XD; + X86pdep, loadi32>, T8, XD, VEX; defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem, - X86pdep, loadi64>, T8, XD, REX_W; + X86pdep, loadi64>, T8, XD, REX_W, VEX; defm PEXT32 : bmi_pdep_pext<"pext{l}", GR32, i32mem, - X86pext, loadi32>, T8, XS; + X86pext, loadi32>, T8, XS, VEX; defm PEXT64 : bmi_pdep_pext<"pext{q}", GR64, i64mem, - X86pext, loadi64>, T8, XS, REX_W; + X86pext, loadi64>, T8, XS, REX_W, VEX; } let Predicates = [HasBMI2, HasEGPR] in { diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td index 94fa6e45ded9..cb751639a057 100644 --- a/llvm/lib/Target/X86/X86InstrPredicates.td +++ b/llvm/lib/Target/X86/X86InstrPredicates.td @@ -8,8 +8,41 @@ def TruePredicate : Predicate<"true">; +// Intel x86 instructions have three separate encoding spaces: legacy, VEX, and +// EVEX. Not all X86 instructions are extended for EGPR. The following is an +// overview of which instructions are extended and how we implement them. +// +// * Legacy space +// All instructions in legacy maps 0 and 1 that have explicit GPR or memory +// operands can use the REX2 prefix to access the EGPR, except XSAVE*/XRSTOR. +// +// * EVEX space +// All instructions in the EVEX space can access the EGPR in their +// register/memory operands. +// +// For the above intructions, the only difference in encoding is reflected in +// the REX2/EVEX prefix when EGPR is used, i.e. the opcode and opcode name are +// unchanged. We don’t add new entries in TD, and instead we extend GPR with +// R16-R31 and make them allocatable only when the feature EGPR is available. +// +// Besides, some instructions in legacy space with map 2/3 and VEX space are +// promoted into EVEX space. Encoding space changes after the promotion, opcode +// and opcode map may change too sometimes. For these instructions, we add new +// entries in TD to avoid overcomplicating the assembler and disassembler. +// +// HasEGPR is for the new entries and NoEGPR is for the entries before +// promotion, so that the promoted variant can be selected first to benefit RA. def HasEGPR : Predicate<"Subtarget->hasEGPR()">; def NoEGPR : Predicate<"!Subtarget->hasEGPR()">; + +// APX extends some instructions with a new form that has an extra register +// operand called a new data destination (NDD). In such forms, NDD is the new +// destination register receiving the result of the computation and all other +// operands (including the original destination operand) become read-only source +// operands. +// +// HasNDD is for the new NDD entries and NoNDD is for the legacy 2-address +// entries, so that the NDD variant can be selected first to benefit RA. def HasNDD : Predicate<"Subtarget->hasNDD()">; def NoNDD : Predicate<"!Subtarget->hasNDD()">; def HasCMOV : Predicate<"Subtarget->canUseCMOV()">; diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td index d13e3b7af69a..f951894db189 100644 --- a/llvm/lib/Target/X86/X86InstrShiftRotate.td +++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td @@ -868,7 +868,7 @@ let Predicates = [HasBMI2, NoEGPR] in { defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8, PD, REX_W; } -let Predicates = [HasBMI2, HasEGPR] in { +let Predicates = [HasBMI2, HasEGPR, In64BitMode] in { defm RORX32 : bmi_rotate<"rorx{l}", GR32, i32mem, "_EVEX">, EVEX; defm RORX64 : bmi_rotate<"rorx{q}", GR64, i64mem, "_EVEX">, REX_W, EVEX; defm SARX32 : bmi_shift<"sarx{l}", GR32, i32mem, "_EVEX">, T8, XS, EVEX; diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td index 699e5847e63f..b1be4739617d 100644 --- a/llvm/lib/Target/X86/X86InstrSystem.td +++ b/llvm/lib/Target/X86/X86InstrSystem.td @@ -695,14 +695,14 @@ def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), Requires<[Not64BitMode, HasINVPCID]>; def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invpcid\t{$src2, $src1|$src1, $src2}", []>, T8, PD, - Requires<[In64BitMode, HasINVPCID]>; + Requires<[In64BitMode]>; def INVPCID64_EVEX : I<0xF2, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invpcid\t{$src2, $src1|$src1, $src2}", []>, - EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasINVPCID]>; + EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>; } // SchedRW -let Predicates = [In64BitMode, HasINVPCID] in { +let Predicates = [HasINVPCID, NoEGPR] in { // The instruction can only use a 64 bit register as the register argument // in 64 bit mode, while the intrinsic only accepts a 32 bit argument // corresponding to it. @@ -714,6 +714,13 @@ let Predicates = [In64BitMode, HasINVPCID] in { addr:$src2)>; } +let Predicates = [HasINVPCID, HasEGPR] in { + def : Pat<(int_x86_invpcid GR32:$src1, addr:$src2), + (INVPCID64_EVEX + (SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src1), sub_32bit), + addr:$src2)>; +} + //===----------------------------------------------------------------------===// // SMAP Instruction diff --git a/llvm/lib/Target/X86/X86InstrUtils.td b/llvm/lib/Target/X86/X86InstrUtils.td index da85922a018d..f4ae15837fbf 100644 --- a/llvm/lib/Target/X86/X86InstrUtils.td +++ b/llvm/lib/Target/X86/X86InstrUtils.td @@ -43,8 +43,6 @@ class XOP { Encoding OpEnc = EncXOP; } class VEX { Encoding OpEnc = EncVEX; } class EVEX { Encoding OpEnc = EncEVEX; } class WIG { bit IgnoresW = 1; } -// Special version of REX_W that can be changed to VEX.W==0 for EVEX2VEX. -class VEX_W1X { bit hasREX_W = 1; bit EVEX_W1_VEX_W0 = 1; } class VEX_L { bit hasVEX_L = 1; } class VEX_LIG { bit ignoresVEX_L = 1; } class VVVV { bit hasVEX_4V = 1; } @@ -66,9 +64,6 @@ class EVEX_CD8<int esize, CD8VForm form> { } class NoCD8 { bits<7> CD8_Scale = 0; } -class EVEX2VEXOverride<string VEXInstrName> { - string EVEX2VEXOverride = VEXInstrName; -} class AVX512BIi8Base : TB, PD { Domain ExeDomain = SSEPackedInt; ImmType ImmT = Imm8; @@ -89,7 +84,6 @@ class AVX512PDIi8Base : TB, PD { Domain ExeDomain = SSEPackedDouble; ImmType ImmT = Imm8; } -class NotEVEX2VEXConvertible { bit notEVEX2VEXConvertible = 1; } class ExplicitREX2Prefix { ExplicitOpPrefix explicitOpPrefix = ExplicitREX2; } class ExplicitVEXPrefix { ExplicitOpPrefix explicitOpPrefix = ExplicitVEX; } class ExplicitEVEXPrefix { ExplicitOpPrefix explicitOpPrefix = ExplicitEVEX; } @@ -1005,7 +999,7 @@ class BinOpRR_RF_Rev<bits<8> o, string m, X86TypeInfo t, bit ndd = 0> } // BinOpRRF_RF - Instructions that read "reg, reg", write "reg" and read/write // EFLAGS. -class BinOpRRF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, bit ndd = 0> +class BinOpRRF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node = null_frag, bit ndd = 0> : BinOpRR<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, (outs t.RegClass:$dst), [(set t.RegClass:$dst, EFLAGS, (node t.RegClass:$src1, t.RegClass:$src2, @@ -1041,7 +1035,7 @@ class BinOpRM_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, bit (t.LoadNode addr:$src2)))]>, DefEFLAGS, NDD<ndd>; // BinOpRMF_RF - Instructions that read "reg, [mem]", write "reg" and read/write // EFLAGS. -class BinOpRMF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, bit ndd = 0> +class BinOpRMF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node = null_frag, bit ndd = 0> : BinOpRM<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, (outs t.RegClass:$dst), [(set t.RegClass:$dst, EFLAGS, (node t.RegClass:$src1, (t.LoadNode addr:$src2), EFLAGS))]>, diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index e1a67f61e766..133ee2041565 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -2055,10 +2055,11 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { } } - // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that - // are compressed from EVEX encoding to VEX encoding. + // Add a comment about EVEX compression if (TM.Options.MCOptions.ShowMCEncoding) { - if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX) + if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_LEGACY) + OutStreamer->AddComment("EVEX TO LEGACY Compression ", false); + else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX) OutStreamer->AddComment("EVEX TO VEX Compression ", false); } diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 5668b514d6de..b92bffbe6239 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -75,7 +75,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() { initializeGlobalISel(PR); initializeWinEHStatePassPass(PR); initializeFixupBWInstPassPass(PR); - initializeEvexToVexInstPassPass(PR); + initializeCompressEVEXPassPass(PR); initializeFixupLEAPassPass(PR); initializeFPSPass(PR); initializeX86FixupSetCCPassPass(PR); @@ -575,7 +575,7 @@ void X86PassConfig::addPreEmitPass() { addPass(createX86FixupInstTuning()); addPass(createX86FixupVectorConstants()); } - addPass(createX86EvexToVexInsts()); + addPass(createX86CompressEVEXPass()); addPass(createX86DiscriminateMemOpsPass()); addPass(createX86InsertPrefetchPass()); addPass(createX86InsertX87waitPass()); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 49631f38017a..cd40b1d3b093 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2232,6 +2232,7 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, static const TypeConversionCostTblEntry AVX512FConversionTbl[] = { { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 }, { ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 }, + { ISD::FP_EXTEND, MVT::v16f64, MVT::v16f32, 4 }, // 2*vcvtps2pd+vextractf64x4 { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 }, { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 3 }, // sext+vpslld+vptestmd |