diff options
Diffstat (limited to 'contrib/llvm/lib/Target/X86/X86InstrInfo.td')
-rw-r--r-- | contrib/llvm/lib/Target/X86/X86InstrInfo.td | 393 |
1 files changed, 245 insertions, 148 deletions
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td index 4ec4d566ca99..8e05dd8ec5c1 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td @@ -1,9 +1,8 @@ //===-- X86InstrInfo.td - Main X86 Instruction Definition --*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -64,6 +63,10 @@ def SDTX86sahf : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i8>]>; def SDTX86rdrand : SDTypeProfile<2, 0, [SDTCisInt<0>, SDTCisVT<1, i32>]>; +def SDTX86rdpkru : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; +def SDTX86wrpkru : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>]>; + def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>, SDTCisVT<2, i8>]>; def SDTX86caspair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; @@ -124,6 +127,9 @@ def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>; def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>; +def SDT_X86ENQCMD : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, + SDTCisPtrTy<1>, SDTCisSameAs<1, 2>]>; + def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER, [SDNPHasChain,SDNPSideEffect]>; def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER, @@ -152,6 +158,11 @@ def X86rdrand : SDNode<"X86ISD::RDRAND", SDTX86rdrand, def X86rdseed : SDNode<"X86ISD::RDSEED", SDTX86rdrand, [SDNPHasChain, SDNPSideEffect]>; +def X86rdpkru : SDNode<"X86ISD::RDPKRU", SDTX86rdpkru, + [SDNPHasChain, SDNPSideEffect]>; +def X86wrpkru : SDNode<"X86ISD::WRPKRU", SDTX86wrpkru, + [SDNPHasChain, SDNPSideEffect]>; + def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; @@ -206,13 +217,6 @@ def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad]>; -def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void, - [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; -def X86rdtscp : SDNode<"X86ISD::RDTSCP_DAG", SDTX86Void, - [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; -def X86rdpmc : SDNode<"X86ISD::RDPMC_DAG", SDTX86Void, - [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; - def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>; def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>; @@ -306,6 +310,11 @@ def X86tpause : SDNode<"X86ISD::TPAUSE", SDTCisVT<2, i32>, SDTCisVT<3, i32>]>, [SDNPHasChain, SDNPSideEffect]>; +def X86enqcmd : SDNode<"X86ISD::ENQCMD", SDT_X86ENQCMD, + [SDNPHasChain, SDNPSideEffect]>; +def X86enqcmds : SDNode<"X86ISD::ENQCMDS", SDT_X86ENQCMD, + [SDNPHasChain, SDNPSideEffect]>; + //===----------------------------------------------------------------------===// // X86 Operand Definitions. // @@ -371,37 +380,35 @@ def anymem : X86MemOperand<"printanymem">; // restrict to only unsized memory. def opaquemem : X86MemOperand<"printopaquemem">; -def i8mem : X86MemOperand<"printi8mem", X86Mem8AsmOperand>; -def i16mem : X86MemOperand<"printi16mem", X86Mem16AsmOperand>; -def i32mem : X86MemOperand<"printi32mem", X86Mem32AsmOperand>; -def i64mem : X86MemOperand<"printi64mem", X86Mem64AsmOperand>; -def i128mem : X86MemOperand<"printi128mem", X86Mem128AsmOperand>; -def i256mem : X86MemOperand<"printi256mem", X86Mem256AsmOperand>; -def i512mem : X86MemOperand<"printi512mem", X86Mem512AsmOperand>; -def f32mem : X86MemOperand<"printf32mem", X86Mem32AsmOperand>; -def f64mem : X86MemOperand<"printf64mem", X86Mem64AsmOperand>; -def f80mem : X86MemOperand<"printf80mem", X86Mem80AsmOperand>; -def f128mem : X86MemOperand<"printf128mem", X86Mem128AsmOperand>; -def f256mem : X86MemOperand<"printf256mem", X86Mem256AsmOperand>; -def f512mem : X86MemOperand<"printf512mem", X86Mem512AsmOperand>; - -def v512mem : X86VMemOperand<VR512, "printf512mem", X86Mem512AsmOperand>; +def i8mem : X86MemOperand<"printbytemem", X86Mem8AsmOperand>; +def i16mem : X86MemOperand<"printwordmem", X86Mem16AsmOperand>; +def i32mem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand>; +def i64mem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>; +def i128mem : X86MemOperand<"printxmmwordmem", X86Mem128AsmOperand>; +def i256mem : X86MemOperand<"printymmwordmem", X86Mem256AsmOperand>; +def i512mem : X86MemOperand<"printzmmwordmem", X86Mem512AsmOperand>; +def f32mem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand>; +def f64mem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>; +def f80mem : X86MemOperand<"printtbytemem", X86Mem80AsmOperand>; +def f128mem : X86MemOperand<"printxmmwordmem", X86Mem128AsmOperand>; +def f256mem : X86MemOperand<"printymmwordmem", X86Mem256AsmOperand>; +def f512mem : X86MemOperand<"printzmmwordmem", X86Mem512AsmOperand>; // Gather mem operands -def vx64mem : X86VMemOperand<VR128, "printi64mem", X86Mem64_RC128Operand>; -def vx128mem : X86VMemOperand<VR128, "printi128mem", X86Mem128_RC128Operand>; -def vx256mem : X86VMemOperand<VR128, "printi256mem", X86Mem256_RC128Operand>; -def vy128mem : X86VMemOperand<VR256, "printi128mem", X86Mem128_RC256Operand>; -def vy256mem : X86VMemOperand<VR256, "printi256mem", X86Mem256_RC256Operand>; - -def vx64xmem : X86VMemOperand<VR128X, "printi64mem", X86Mem64_RC128XOperand>; -def vx128xmem : X86VMemOperand<VR128X, "printi128mem", X86Mem128_RC128XOperand>; -def vx256xmem : X86VMemOperand<VR128X, "printi256mem", X86Mem256_RC128XOperand>; -def vy128xmem : X86VMemOperand<VR256X, "printi128mem", X86Mem128_RC256XOperand>; -def vy256xmem : X86VMemOperand<VR256X, "printi256mem", X86Mem256_RC256XOperand>; -def vy512xmem : X86VMemOperand<VR256X, "printi512mem", X86Mem512_RC256XOperand>; -def vz256mem : X86VMemOperand<VR512, "printi256mem", X86Mem256_RC512Operand>; -def vz512mem : X86VMemOperand<VR512, "printi512mem", X86Mem512_RC512Operand>; +def vx64mem : X86VMemOperand<VR128, "printqwordmem", X86Mem64_RC128Operand>; +def vx128mem : X86VMemOperand<VR128, "printxmmwordmem", X86Mem128_RC128Operand>; +def vx256mem : X86VMemOperand<VR128, "printymmwordmem", X86Mem256_RC128Operand>; +def vy128mem : X86VMemOperand<VR256, "printxmmwordmem", X86Mem128_RC256Operand>; +def vy256mem : X86VMemOperand<VR256, "printymmwordmem", X86Mem256_RC256Operand>; + +def vx64xmem : X86VMemOperand<VR128X, "printqwordmem", X86Mem64_RC128XOperand>; +def vx128xmem : X86VMemOperand<VR128X, "printxmmwordmem", X86Mem128_RC128XOperand>; +def vx256xmem : X86VMemOperand<VR128X, "printymmwordmem", X86Mem256_RC128XOperand>; +def vy128xmem : X86VMemOperand<VR256X, "printxmmwordmem", X86Mem128_RC256XOperand>; +def vy256xmem : X86VMemOperand<VR256X, "printymmwordmem", X86Mem256_RC256XOperand>; +def vy512xmem : X86VMemOperand<VR256X, "printzmmwordmem", X86Mem512_RC256XOperand>; +def vz256mem : X86VMemOperand<VR512, "printymmwordmem", X86Mem256_RC512Operand>; +def vz512mem : X86VMemOperand<VR512, "printzmmwordmem", X86Mem512_RC512Operand>; // A version of i8mem for use on x86-64 and x32 that uses a NOREX GPR instead // of a plain GPR, so that it doesn't potentially require a REX prefix. @@ -409,7 +416,7 @@ def ptr_rc_norex : PointerLikeRegClass<2>; def ptr_rc_norex_nosp : PointerLikeRegClass<3>; def i8mem_NOREX : Operand<iPTR> { - let PrintMethod = "printi8mem"; + let PrintMethod = "printbytemem"; let MIOperandInfo = (ops ptr_rc_norex, i8imm, ptr_rc_norex_nosp, i32imm, SEGMENT_REG); let ParserMatchClass = X86Mem8AsmOperand; @@ -424,7 +431,7 @@ def ptr_rc_tailcall : PointerLikeRegClass<4>; // allowed to use callee-saved registers since they must be scheduled // after callee-saved register are popped. def i32mem_TC : Operand<i32> { - let PrintMethod = "printi32mem"; + let PrintMethod = "printdwordmem"; let MIOperandInfo = (ops ptr_rc_tailcall, i8imm, ptr_rc_tailcall, i32imm, SEGMENT_REG); let ParserMatchClass = X86Mem32AsmOperand; @@ -435,7 +442,7 @@ def i32mem_TC : Operand<i32> { // allowed to use callee-saved registers since they must be scheduled // after callee-saved register are popped. def i64mem_TC : Operand<i64> { - let PrintMethod = "printi64mem"; + let PrintMethod = "printqwordmem"; let MIOperandInfo = (ops ptr_rc_tailcall, i8imm, ptr_rc_tailcall, i32imm, SEGMENT_REG); let ParserMatchClass = X86Mem64AsmOperand; @@ -603,24 +610,10 @@ def offset64_32 : X86MemOffsOperand<i64imm, "printMemOffs32", def offset64_64 : X86MemOffsOperand<i64imm, "printMemOffs64", X86MemOffs64_64AsmOperand>; -def SSECC : Operand<i8> { - let PrintMethod = "printSSEAVXCC"; - let OperandType = "OPERAND_IMMEDIATE"; -} - -def AVXCC : Operand<i8> { - let PrintMethod = "printSSEAVXCC"; - let OperandType = "OPERAND_IMMEDIATE"; -} - -def AVX512ICC : Operand<i8> { - let PrintMethod = "printSSEAVXCC"; - let OperandType = "OPERAND_IMMEDIATE"; -} - -def XOPCC : Operand<i8> { - let PrintMethod = "printXOPCC"; - let OperandType = "OPERAND_IMMEDIATE"; +def ccode : Operand<i8> { + let PrintMethod = "printCondCode"; + let OperandNamespace = "X86"; + let OperandType = "OPERAND_COND_CODE"; } class ImmSExtAsmOperandClass : AsmOperandClass { @@ -640,7 +633,8 @@ def AVX512RCOperand : AsmOperandClass { } def AVX512RC : Operand<i32> { let PrintMethod = "printRoundingControl"; - let OperandType = "OPERAND_IMMEDIATE"; + let OperandNamespace = "X86"; + let OperandType = "OPERAND_ROUNDING_CONTROL"; let ParserMatchClass = AVX512RCOperand; } @@ -718,6 +712,14 @@ def u8imm : Operand<i8> { let OperandType = "OPERAND_IMMEDIATE"; } +// 16-bit immediate but only 8-bits are significant and they are unsigned. +// Used by BT instructions. +def i16u8imm : Operand<i16> { + let PrintMethod = "printU8Imm"; + let ParserMatchClass = ImmUnsignedi8AsmOperand; + let OperandType = "OPERAND_IMMEDIATE"; +} + // 32-bit immediate but only 8-bits are significant and they are unsigned. // Used by some SSE/AVX instructions that use intrinsics. def i32u8imm : Operand<i32> { @@ -726,6 +728,14 @@ def i32u8imm : Operand<i32> { let OperandType = "OPERAND_IMMEDIATE"; } +// 64-bit immediate but only 8-bits are significant and they are unsigned. +// Used by BT instructions. +def i64u8imm : Operand<i64> { + let PrintMethod = "printU8Imm"; + let ParserMatchClass = ImmUnsignedi8AsmOperand; + let OperandType = "OPERAND_IMMEDIATE"; +} + // 64-bits but only 32 bits are significant, and those bits are treated as being // pc relative. def i64i32imm_pcrel : Operand<i64> { @@ -747,6 +757,33 @@ def lea64mem : Operand<i64> { let ParserMatchClass = X86MemAsmOperand; } +let RenderMethod = "addMaskPairOperands" in { + def VK1PairAsmOperand : AsmOperandClass { let Name = "VK1Pair"; } + def VK2PairAsmOperand : AsmOperandClass { let Name = "VK2Pair"; } + def VK4PairAsmOperand : AsmOperandClass { let Name = "VK4Pair"; } + def VK8PairAsmOperand : AsmOperandClass { let Name = "VK8Pair"; } + def VK16PairAsmOperand : AsmOperandClass { let Name = "VK16Pair"; } +} + +def VK1Pair : RegisterOperand<VK1PAIR, "printVKPair"> { + let ParserMatchClass = VK1PairAsmOperand; +} + +def VK2Pair : RegisterOperand<VK2PAIR, "printVKPair"> { + let ParserMatchClass = VK2PairAsmOperand; +} + +def VK4Pair : RegisterOperand<VK4PAIR, "printVKPair"> { + let ParserMatchClass = VK4PairAsmOperand; +} + +def VK8Pair : RegisterOperand<VK8PAIR, "printVKPair"> { + let ParserMatchClass = VK8PairAsmOperand; +} + +def VK16Pair : RegisterOperand<VK16PAIR, "printVKPair"> { + let ParserMatchClass = VK16PairAsmOperand; +} //===----------------------------------------------------------------------===// // X86 Complex Pattern Definitions. @@ -833,6 +870,8 @@ def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">; def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">; def PKU : Predicate<"Subtarget->hasPKU()">; def HasVNNI : Predicate<"Subtarget->hasVNNI()">; +def HasVP2INTERSECT : Predicate<"Subtarget->hasVP2INTERSECT()">; +def HasBF16 : Predicate<"Subtarget->hasBF16()">; def HasBITALG : Predicate<"Subtarget->hasBITALG()">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; @@ -894,8 +933,10 @@ def HasWBNOINVD : Predicate<"Subtarget->hasWBNOINVD()">; def HasRDPID : Predicate<"Subtarget->hasRDPID()">; def HasWAITPKG : Predicate<"Subtarget->hasWAITPKG()">; def HasINVPCID : Predicate<"Subtarget->hasINVPCID()">; +def HasCmpxchg8b : Predicate<"Subtarget->hasCmpxchg8b()">; def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; def HasPCONFIG : Predicate<"Subtarget->hasPCONFIG()">; +def HasENQCMD : Predicate<"Subtarget->hasENQCMD()">; def Not64BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate<"!Mode64Bit", "Not 64-bit mode">; def In64BitMode : Predicate<"Subtarget->is64Bit()">, @@ -928,12 +969,12 @@ def IsNotPIC : Predicate<"!TM.isPositionIndependent()">; // the Function object through the <Target>Subtarget and objections were raised // to that (see post-commit review comments for r301750). let RecomputePerFunction = 1 in { - def OptForSize : Predicate<"MF->getFunction().optForSize()">; - def OptForMinSize : Predicate<"MF->getFunction().optForMinSize()">; - def OptForSpeed : Predicate<"!MF->getFunction().optForSize()">; + def OptForSize : Predicate<"MF->getFunction().hasOptSize()">; + def OptForMinSize : Predicate<"MF->getFunction().hasMinSize()">; + def OptForSpeed : Predicate<"!MF->getFunction().hasOptSize()">; def UseIncDec : Predicate<"!Subtarget->slowIncDec() || " - "MF->getFunction().optForSize()">; - def NoSSE41_Or_OptForSize : Predicate<"MF->getFunction().optForSize() || " + "MF->getFunction().hasOptSize()">; + def NoSSE41_Or_OptForSize : Predicate<"MF->getFunction().hasOptSize() || " "!Subtarget->hasSSE41()">; } @@ -959,22 +1000,22 @@ include "X86InstrFormats.td" // X86 specific condition code. These correspond to CondCode in // X86InstrInfo.h. They must be kept in synch. -def X86_COND_A : PatLeaf<(i8 0)>; // alt. COND_NBE -def X86_COND_AE : PatLeaf<(i8 1)>; // alt. COND_NC +def X86_COND_O : PatLeaf<(i8 0)>; +def X86_COND_NO : PatLeaf<(i8 1)>; def X86_COND_B : PatLeaf<(i8 2)>; // alt. COND_C -def X86_COND_BE : PatLeaf<(i8 3)>; // alt. COND_NA +def X86_COND_AE : PatLeaf<(i8 3)>; // alt. COND_NC def X86_COND_E : PatLeaf<(i8 4)>; // alt. COND_Z -def X86_COND_G : PatLeaf<(i8 5)>; // alt. COND_NLE -def X86_COND_GE : PatLeaf<(i8 6)>; // alt. COND_NL -def X86_COND_L : PatLeaf<(i8 7)>; // alt. COND_NGE -def X86_COND_LE : PatLeaf<(i8 8)>; // alt. COND_NG -def X86_COND_NE : PatLeaf<(i8 9)>; // alt. COND_NZ -def X86_COND_NO : PatLeaf<(i8 10)>; +def X86_COND_NE : PatLeaf<(i8 5)>; // alt. COND_NZ +def X86_COND_BE : PatLeaf<(i8 6)>; // alt. COND_NA +def X86_COND_A : PatLeaf<(i8 7)>; // alt. COND_NBE +def X86_COND_S : PatLeaf<(i8 8)>; +def X86_COND_NS : PatLeaf<(i8 9)>; +def X86_COND_P : PatLeaf<(i8 10)>; // alt. COND_PE def X86_COND_NP : PatLeaf<(i8 11)>; // alt. COND_PO -def X86_COND_NS : PatLeaf<(i8 12)>; -def X86_COND_O : PatLeaf<(i8 13)>; -def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE -def X86_COND_S : PatLeaf<(i8 15)>; +def X86_COND_L : PatLeaf<(i8 12)>; // alt. COND_NGE +def X86_COND_GE : PatLeaf<(i8 13)>; // alt. COND_NL +def X86_COND_LE : PatLeaf<(i8 14)>; // alt. COND_NG +def X86_COND_G : PatLeaf<(i8 15)>; // alt. COND_NLE def i16immSExt8 : ImmLeaf<i16, [{ return isInt<8>(Imm); }]>; def i32immSExt8 : ImmLeaf<i32, [{ return isInt<8>(Imm); }]>; @@ -1007,16 +1048,13 @@ def i64relocImmSExt32 : PatLeaf<(i64 relocImm), [{ // Eventually, it would be nice to allow ConstantHoisting to merge constants // globally for potentially added savings. // -def imm8_su : PatLeaf<(i8 relocImm), [{ +def relocImm8_su : PatLeaf<(i8 relocImm), [{ return !shouldAvoidImmediateInstFormsForSize(N); }]>; -def imm16_su : PatLeaf<(i16 relocImm), [{ +def relocImm16_su : PatLeaf<(i16 relocImm), [{ return !shouldAvoidImmediateInstFormsForSize(N); }]>; -def imm32_su : PatLeaf<(i32 relocImm), [{ - return !shouldAvoidImmediateInstFormsForSize(N); -}]>; -def i64immSExt32_su : PatLeaf<(i64immSExt32), [{ +def relocImm32_su : PatLeaf<(i32 relocImm), [{ return !shouldAvoidImmediateInstFormsForSize(N); }]>; @@ -1121,7 +1159,19 @@ def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>; def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>; def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>; def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>; -def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>; + +// We can treat an i8/i16 extending load to i64 as a 32 bit load if its known +// to be 4 byte aligned or better. +def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (unindexedload node:$ptr)), [{ + LoadSDNode *LD = cast<LoadSDNode>(N); + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (ExtType != ISD::EXTLOAD) + return false; + if (LD->getMemoryVT() == MVT::i32) + return true; + + return LD->getAlignment() >= 4 && !LD->isVolatile(); +}]>; // An 'and' node with a single use. @@ -1517,16 +1567,16 @@ def MOV32ri_alt : Ii32<0xC7, MRM0r, (outs GR32:$dst), (ins i32imm:$src), let SchedRW = [WriteStore] in { def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src), "mov{b}\t{$src, $dst|$dst, $src}", - [(store (i8 imm8_su:$src), addr:$dst)]>; + [(store (i8 relocImm8_su:$src), addr:$dst)]>; def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src), "mov{w}\t{$src, $dst|$dst, $src}", - [(store (i16 imm16_su:$src), addr:$dst)]>, OpSize16; + [(store (i16 relocImm16_su:$src), addr:$dst)]>, OpSize16; def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src), "mov{l}\t{$src, $dst|$dst, $src}", - [(store (i32 imm32_su:$src), addr:$dst)]>, OpSize32; + [(store (i32 relocImm32_su:$src), addr:$dst)]>, OpSize32; def MOV64mi32 : RIi32S<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), "mov{q}\t{$src, $dst|$dst, $src}", - [(store i64immSExt32_su:$src, addr:$dst)]>, + [(store i64relocImmSExt32_su:$src, addr:$dst)]>, Requires<[In64BitMode]>; } // SchedRW @@ -1773,36 +1823,36 @@ let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteBitTestRegLd] in { } let SchedRW = [WriteBitTest] in { -def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2), +def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16u8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))]>, + [(set EFLAGS, (X86bt GR16:$src1, imm:$src2))]>, OpSize16, TB; -def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2), +def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32u8imm:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))]>, + [(set EFLAGS, (X86bt GR32:$src1, imm:$src2))]>, OpSize32, TB; -def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), +def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64u8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB; + [(set EFLAGS, (X86bt GR64:$src1, imm:$src2))]>, TB; } // SchedRW // Note that these instructions aren't slow because that only applies when the // other operand is in a register. When it's an immediate, bt is still fast. let SchedRW = [WriteBitTestImmLd] in { -def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2), +def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16u8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi16 addr:$src1), - i16immSExt8:$src2))]>, + imm:$src2))]>, OpSize16, TB; -def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2), +def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32u8imm:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi32 addr:$src1), - i32immSExt8:$src2))]>, + imm:$src2))]>, OpSize32, TB; -def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), +def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64u8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi64 addr:$src1), - i64immSExt8:$src2))]>, TB, + imm:$src2))]>, TB, Requires<[In64BitMode]>; } // SchedRW @@ -1832,20 +1882,20 @@ def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), } let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in { -def BTC16ri8 : Ii8<0xBA, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), +def BTC16ri8 : Ii8<0xBA, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i16u8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB; -def BTC32ri8 : Ii8<0xBA, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), +def BTC32ri8 : Ii8<0xBA, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i32u8imm:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", []>, OpSize32, TB; -def BTC64ri8 : RIi8<0xBA, MRM7r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), +def BTC64ri8 : RIi8<0xBA, MRM7r, (outs GR64:$dst), (ins GR64:$src1, i64u8imm:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; } // SchedRW let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in { -def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2), +def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16u8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB; -def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2), +def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32u8imm:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", []>, OpSize32, TB; -def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), +def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64u8imm:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB, Requires<[In64BitMode]>; } @@ -1875,24 +1925,24 @@ def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), } let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in { -def BTR16ri8 : Ii8<0xBA, MRM6r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), +def BTR16ri8 : Ii8<0xBA, MRM6r, (outs GR16:$dst), (ins GR16:$src1, i16u8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB; -def BTR32ri8 : Ii8<0xBA, MRM6r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), +def BTR32ri8 : Ii8<0xBA, MRM6r, (outs GR32:$dst), (ins GR32:$src1, i32u8imm:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", []>, OpSize32, TB; -def BTR64ri8 : RIi8<0xBA, MRM6r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), +def BTR64ri8 : RIi8<0xBA, MRM6r, (outs GR64:$dst), (ins GR64:$src1, i64u8imm:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; } // SchedRW let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in { -def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2), +def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16u8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB; -def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2), +def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32u8imm:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", []>, OpSize32, TB; -def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2), +def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64u8imm:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB, Requires<[In64BitMode]>; } @@ -1922,20 +1972,20 @@ def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), } let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in { -def BTS16ri8 : Ii8<0xBA, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), +def BTS16ri8 : Ii8<0xBA, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16u8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB; -def BTS32ri8 : Ii8<0xBA, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), +def BTS32ri8 : Ii8<0xBA, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32u8imm:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", []>, OpSize32, TB; -def BTS64ri8 : RIi8<0xBA, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), +def BTS64ri8 : RIi8<0xBA, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i64u8imm:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; } // SchedRW let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in { -def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2), +def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16u8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB; -def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2), +def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32u8imm:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", []>, OpSize32, TB; -def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2), +def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64u8imm:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB, Requires<[In64BitMode]>; } @@ -2090,12 +2140,13 @@ def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), - "cmpxchg8b\t$dst", []>, TB; + "cmpxchg8b\t$dst", []>, TB, Requires<[HasCmpxchg8b]>; let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in +// NOTE: In64BitMode check needed for the AssemblerPredicate. def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst), "cmpxchg16b\t$dst", []>, - TB, Requires<[HasCmpxchg16b, In64BitMode]>; + TB, Requires<[HasCmpxchg16b,In64BitMode]>; } // SchedRW, mayLoad, mayStore, hasSideEffects @@ -2388,6 +2439,11 @@ def xor_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs), return hasNoCarryFlagUses(SDValue(N, 1)); }]>; +def and_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs), + (X86and_flag node:$lhs, node:$rhs), [{ + return hasNoCarryFlagUses(SDValue(N, 1)); +}]>; + let Predicates = [HasBMI] in { // FIXME: patterns for the load versions are not implemented def : Pat<(and GR32:$src, (add GR32:$src, -1)), @@ -2406,12 +2462,20 @@ let Predicates = [HasBMI] in { (BLSI64rr GR64:$src)>; // Versions to match flag producing ops. - // X86and_flag nodes are rarely created. Those should use CMP+AND. We do - // TESTrr matching in PostProcessISelDAG to allow BLSR/BLSI to be formed. + def : Pat<(and_flag_nocf GR32:$src, (add GR32:$src, -1)), + (BLSR32rr GR32:$src)>; + def : Pat<(and_flag_nocf GR64:$src, (add GR64:$src, -1)), + (BLSR64rr GR64:$src)>; + def : Pat<(xor_flag_nocf GR32:$src, (add GR32:$src, -1)), (BLSMSK32rr GR32:$src)>; def : Pat<(xor_flag_nocf GR64:$src, (add GR64:$src, -1)), (BLSMSK64rr GR64:$src)>; + + def : Pat<(and_flag_nocf GR32:$src, (ineg GR32:$src)), + (BLSI32rr GR32:$src)>; + def : Pat<(and_flag_nocf GR64:$src, (ineg GR64:$src)), + (BLSI64rr GR64:$src)>; } multiclass bmi_bextr<bits<8> opc, string mnemonic, RegisterClass RC, @@ -2653,16 +2717,12 @@ defm LWPVAL64 : lwpval_intr<GR64, int_x86_lwpval64>, VEX_W; // MONITORX/MWAITX Instructions // let SchedRW = [ WriteSystem ] in { - let usesCustomInserter = 1 in { - def MONITORX : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), - [(int_x86_monitorx addr:$src1, GR32:$src2, GR32:$src3)]>, - Requires<[ HasMWAITX ]>; - } - - let Uses = [ EAX, ECX, EDX ] in { - def MONITORXrrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", []>, - TB, Requires<[ HasMWAITX ]>; - } + let Uses = [ EAX, ECX, EDX ] in + def MONITORX32rrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", []>, + TB, Requires<[ HasMWAITX, Not64BitMode ]>; + let Uses = [ RAX, ECX, EDX ] in + def MONITORX64rrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", []>, + TB, Requires<[ HasMWAITX, In64BitMode ]>; let Uses = [ ECX, EAX, EBX ] in { def MWAITXrrr : I<0x01, MRM_FB, (outs), (ins), "mwaitx", @@ -2676,9 +2736,9 @@ def : InstAlias<"mwaitx\t{%eax, %ecx, %ebx|ebx, ecx, eax}", (MWAITXrrr)>, def : InstAlias<"mwaitx\t{%rax, %rcx, %rbx|rbx, rcx, rax}", (MWAITXrrr)>, Requires<[ In64BitMode ]>; -def : InstAlias<"monitorx\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORXrrr)>, +def : InstAlias<"monitorx\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORX32rrr)>, Requires<[ Not64BitMode ]>; -def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORXrrr)>, +def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORX64rrr)>, Requires<[ In64BitMode ]>; //===----------------------------------------------------------------------===// @@ -2738,21 +2798,50 @@ def MOVDIR64B64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src), } // SchedRW //===----------------------------------------------------------------------===// +// ENQCMD/S - Enqueue 64-byte command as user with 64-byte write atomicity +// +let SchedRW = [WriteStore], Defs = [EFLAGS] in { + def ENQCMD16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src), + "enqcmd\t{$src, $dst|$dst, $src}", + [(set EFLAGS, (X86enqcmd GR16:$dst, addr:$src))]>, + T8XD, AdSize16, Requires<[HasENQCMD, Not64BitMode]>; + def ENQCMD32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src), + "enqcmd\t{$src, $dst|$dst, $src}", + [(set EFLAGS, (X86enqcmd GR32:$dst, addr:$src))]>, + T8XD, AdSize32, Requires<[HasENQCMD]>; + def ENQCMD64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src), + "enqcmd\t{$src, $dst|$dst, $src}", + [(set EFLAGS, (X86enqcmd GR64:$dst, addr:$src))]>, + T8XD, AdSize64, Requires<[HasENQCMD, In64BitMode]>; + + def ENQCMDS16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src), + "enqcmds\t{$src, $dst|$dst, $src}", + [(set EFLAGS, (X86enqcmds GR16:$dst, addr:$src))]>, + T8XS, AdSize16, Requires<[HasENQCMD, Not64BitMode]>; + def ENQCMDS32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src), + "enqcmds\t{$src, $dst|$dst, $src}", + [(set EFLAGS, (X86enqcmds GR32:$dst, addr:$src))]>, + T8XS, AdSize32, Requires<[HasENQCMD]>; + def ENQCMDS64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src), + "enqcmds\t{$src, $dst|$dst, $src}", + [(set EFLAGS, (X86enqcmds GR64:$dst, addr:$src))]>, + T8XS, AdSize64, Requires<[HasENQCMD, In64BitMode]>; +} + +//===----------------------------------------------------------------------===// // CLZERO Instruction // let SchedRW = [WriteSystem] in { let Uses = [EAX] in - def CLZEROr : I<0x01, MRM_FC, (outs), (ins), "clzero", []>, - TB, Requires<[HasCLZERO]>; - - let usesCustomInserter = 1 in { - def CLZERO : PseudoI<(outs), (ins i32mem:$src1), - [(int_x86_clzero addr:$src1)]>, Requires<[HasCLZERO]>; - } + def CLZERO32r : I<0x01, MRM_FC, (outs), (ins), "clzero", []>, + TB, Requires<[HasCLZERO, Not64BitMode]>; + let Uses = [RAX] in + def CLZERO64r : I<0x01, MRM_FC, (outs), (ins), "clzero", []>, + TB, Requires<[HasCLZERO, In64BitMode]>; } // SchedRW -def : InstAlias<"clzero\t{%eax|eax}", (CLZEROr)>, Requires<[Not64BitMode]>; -def : InstAlias<"clzero\t{%rax|rax}", (CLZEROr)>, Requires<[In64BitMode]>; +def : InstAlias<"clzero\t{%eax|eax}", (CLZERO32r)>, Requires<[Not64BitMode]>; +def : InstAlias<"clzero\t{%rax|rax}", (CLZERO64r)>, Requires<[In64BitMode]>; //===----------------------------------------------------------------------===// // Pattern fragments to auto generate TBM instructions. @@ -2812,8 +2901,6 @@ let Predicates = [HasTBM] in { (TZMSK64rr GR64:$src)>; // Patterns to match flag producing ops. - // X86and_flag nodes are rarely created. Those should use CMP+AND. We do - // TESTrr matching in PostProcessISelDAG to allow BLSR/BLSI to be formed. def : Pat<(or_flag_nocf GR32:$src, (not (add GR32:$src, 1))), (BLCI32rr GR32:$src)>; def : Pat<(or_flag_nocf GR64:$src, (not (add GR64:$src, 1))), @@ -2825,6 +2912,11 @@ let Predicates = [HasTBM] in { def : Pat<(or_flag_nocf GR64:$src, (sub -2, GR64:$src)), (BLCI64rr GR64:$src)>; + def : Pat<(and_flag_nocf (not GR32:$src), (add GR32:$src, 1)), + (BLCIC32rr GR32:$src)>; + def : Pat<(and_flag_nocf (not GR64:$src), (add GR64:$src, 1)), + (BLCIC64rr GR64:$src)>; + def : Pat<(xor_flag_nocf GR32:$src, (add GR32:$src, 1)), (BLCMSK32rr GR32:$src)>; def : Pat<(xor_flag_nocf GR64:$src, (add GR64:$src, 1)), @@ -2849,6 +2941,11 @@ let Predicates = [HasTBM] in { (T1MSKC32rr GR32:$src)>; def : Pat<(or_flag_nocf (not GR64:$src), (add GR64:$src, 1)), (T1MSKC64rr GR64:$src)>; + + def : Pat<(and_flag_nocf (not GR32:$src), (add GR32:$src, -1)), + (TZMSK32rr GR32:$src)>; + def : Pat<(and_flag_nocf (not GR64:$src), (add GR64:$src, -1)), + (TZMSK64rr GR64:$src)>; } // HasTBM //===----------------------------------------------------------------------===// |