diff options
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/AsmParser/X86AsmParser.cpp | 112 | ||||
-rw-r--r-- | lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp | 20 | ||||
-rw-r--r-- | lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86.td | 30 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 106 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrAVX512.td | 645 | ||||
-rw-r--r-- | lib/Target/X86/X86InstructionSelector.cpp | 53 | ||||
-rw-r--r-- | lib/Target/X86/X86LegalizerInfo.cpp | 20 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.cpp | 55 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.h | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86TargetMachine.cpp | 47 | ||||
-rw-r--r-- | lib/Target/X86/X86TargetMachine.h | 1 |
12 files changed, 959 insertions, 134 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index d30cc724c203..825f23dc52d9 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -49,8 +49,11 @@ static const char OpPrecedence[] = { 4, // IC_MINUS 5, // IC_MULTIPLY 5, // IC_DIVIDE - 6, // IC_RPAREN - 7, // IC_LPAREN + 5, // IC_MOD + 6, // IC_NOT + 7, // IC_NEG + 8, // IC_RPAREN + 9, // IC_LPAREN 0, // IC_IMM 0 // IC_REGISTER }; @@ -92,6 +95,9 @@ private: IC_MINUS, IC_MULTIPLY, IC_DIVIDE, + IC_MOD, + IC_NOT, + IC_NEG, IC_RPAREN, IC_LPAREN, IC_IMM, @@ -111,6 +117,10 @@ private: SmallVector<InfixCalculatorTok, 4> InfixOperatorStack; SmallVector<ICToken, 4> PostfixStack; + bool isUnaryOperator(const InfixCalculatorTok Op) { + return Op == IC_NEG || Op == IC_NOT; + } + public: int64_t popOperand() { assert (!PostfixStack.empty() && "Poped an empty stack!"); @@ -192,6 +202,22 @@ private: ICToken Op = PostfixStack[i]; if (Op.first == IC_IMM || Op.first == IC_REGISTER) { OperandStack.push_back(Op); + } else if (isUnaryOperator(Op.first)) { + assert (OperandStack.size() > 0 && "Too few operands."); + ICToken Operand = OperandStack.pop_back_val(); + assert (Operand.first == IC_IMM && + "Unary operation with a register!"); + switch (Op.first) { + default: + report_fatal_error("Unexpected operator!"); + break; + case IC_NEG: + OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second)); + break; + case IC_NOT: + OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second)); + break; + } } else { assert (OperandStack.size() > 1 && "Too few operands."); int64_t Val; @@ -222,6 +248,12 @@ private: Val = Op1.second / Op2.second; OperandStack.push_back(std::make_pair(IC_IMM, Val)); break; + case IC_MOD: + assert (Op1.first == IC_IMM && Op2.first == IC_IMM && + "Modulo operation with an immediate and a register!"); + Val = Op1.second % Op2.second; + OperandStack.push_back(std::make_pair(IC_IMM, Val)); + break; case IC_OR: assert (Op1.first == IC_IMM && Op2.first == IC_IMM && "Or operation with an immediate and a register!"); @@ -271,6 +303,7 @@ private: IES_NOT, IES_MULTIPLY, IES_DIVIDE, + IES_MOD, IES_LBRAC, IES_RBRAC, IES_LPAREN, @@ -421,10 +454,16 @@ private: default: State = IES_ERROR; break; + case IES_OR: + case IES_XOR: + case IES_AND: + case IES_LSHIFT: + case IES_RSHIFT: case IES_PLUS: case IES_NOT: case IES_MULTIPLY: case IES_DIVIDE: + case IES_MOD: case IES_LPAREN: case IES_RPAREN: case IES_LBRAC: @@ -432,11 +471,12 @@ private: case IES_INTEGER: case IES_REGISTER: State = IES_MINUS; - // Only push the minus operator if it is not a unary operator. - if (!(CurrState == IES_PLUS || CurrState == IES_MINUS || - CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE || - CurrState == IES_LPAREN || CurrState == IES_LBRAC)) + // push minus operator if it is not a negate operator + if (CurrState == IES_REGISTER || CurrState == IES_RPAREN || + CurrState == IES_INTEGER || CurrState == IES_RBRAC) IC.pushOperator(IC_MINUS); + else + IC.pushOperator(IC_NEG); if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { // If we already have a BaseReg, then assume this is the IndexReg with // a scale of 1. @@ -458,9 +498,21 @@ private: default: State = IES_ERROR; break; + case IES_OR: + case IES_XOR: + case IES_AND: + case IES_LSHIFT: + case IES_RSHIFT: case IES_PLUS: + case IES_MINUS: case IES_NOT: + case IES_MULTIPLY: + case IES_DIVIDE: + case IES_MOD: + case IES_LPAREN: + case IES_LBRAC: State = IES_NOT; + IC.pushOperator(IC_NOT); break; } PrevState = CurrState; @@ -525,6 +577,7 @@ private: case IES_LSHIFT: case IES_RSHIFT: case IES_DIVIDE: + case IES_MOD: case IES_MULTIPLY: case IES_LPAREN: State = IES_INTEGER; @@ -539,26 +592,6 @@ private: } // Get the scale and replace the 'Register * Scale' with '0'. IC.popOperator(); - } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS || - PrevState == IES_OR || PrevState == IES_AND || - PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || - PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || - PrevState == IES_LPAREN || PrevState == IES_LBRAC || - PrevState == IES_NOT || PrevState == IES_XOR) && - CurrState == IES_MINUS) { - // Unary minus. No need to pop the minus operand because it was never - // pushed. - IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm. - } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS || - PrevState == IES_OR || PrevState == IES_AND || - PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || - PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || - PrevState == IES_LPAREN || PrevState == IES_LBRAC || - PrevState == IES_NOT || PrevState == IES_XOR) && - CurrState == IES_NOT) { - // Unary not. No need to pop the not operand because it was never - // pushed. - IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm. } else { IC.pushOperand(IC_IMM, TmpInt); } @@ -594,6 +627,19 @@ private: break; } } + void onMod() { + PrevState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_INTEGER: + case IES_RPAREN: + State = IES_MOD; + IC.pushOperator(IC_MOD); + break; + } + } void onLBrac() { PrevState = State; switch (State) { @@ -647,18 +693,8 @@ private: case IES_RSHIFT: case IES_MULTIPLY: case IES_DIVIDE: + case IES_MOD: case IES_LPAREN: - // FIXME: We don't handle this type of unary minus or not, yet. - if ((PrevState == IES_PLUS || PrevState == IES_MINUS || - PrevState == IES_OR || PrevState == IES_AND || - PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || - PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || - PrevState == IES_LPAREN || PrevState == IES_LBRAC || - PrevState == IES_NOT || PrevState == IES_XOR) && - (CurrState == IES_MINUS || CurrState == IES_NOT)) { - State = IES_ERROR; - break; - } State = IES_LPAREN; IC.pushOperator(IC_LPAREN); break; @@ -1302,6 +1338,8 @@ bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine SM.onXor(); else if (Name.equals_lower("and")) SM.onAnd(); + else if (Name.equals_lower("mod")) + SM.onMod(); else return false; return true; diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index caf98bffb80d..8f2017e990c5 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -396,7 +396,7 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer, if (!SB->getFragment()) { Asm.getContext().reportError( Fixup.getLoc(), - "symbol '" + B->getSymbol().getName() + + "symbol '" + SB->getName() + "' can not be undefined in a subtraction expression"); return false; } @@ -408,7 +408,7 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer, // pedantic compatibility with 'as'. Type = A->isExternal() ? (unsigned)MachO::GENERIC_RELOC_SECTDIFF : (unsigned)MachO::GENERIC_RELOC_LOCAL_SECTDIFF; - Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout); + Value2 = Writer->getSymbolAddress(*SB, Layout); FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent()); } @@ -468,8 +468,8 @@ void X86MachObjectWriter::recordTLVPRelocation(MachObjectWriter *Writer, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { - assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP && - !is64Bit() && + const MCSymbolRefExpr *SymA = Target.getSymA(); + assert(SymA->getKind() == MCSymbolRefExpr::VK_TLVP && !is64Bit() && "Should only be called with a 32-bit TLVP relocation!"); unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); @@ -480,15 +480,14 @@ void X86MachObjectWriter::recordTLVPRelocation(MachObjectWriter *Writer, // subtraction from the picbase. For 32-bit pic the addend is the difference // between the picbase and the next address. For 32-bit static the addend is // zero. - if (Target.getSymB()) { + if (auto *SymB = Target.getSymB()) { // If this is a subtraction then we're pcrel. uint32_t FixupAddress = Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset(); IsPCRel = 1; - FixedValue = - FixupAddress - - Writer->getSymbolAddress(Target.getSymB()->getSymbol(), Layout) + - Target.getConstant(); + FixedValue = FixupAddress - + Writer->getSymbolAddress(SymB->getSymbol(), Layout) + + Target.getConstant(); FixedValue += 1ULL << Log2Size; } else { FixedValue = 0; @@ -499,8 +498,7 @@ void X86MachObjectWriter::recordTLVPRelocation(MachObjectWriter *Writer, MRE.r_word0 = Value; MRE.r_word1 = (IsPCRel << 24) | (Log2Size << 25) | (MachO::GENERIC_RELOC_TLV << 28); - Writer->addRelocation(&Target.getSymA()->getSymbol(), Fragment->getParent(), - MRE); + Writer->addRelocation(&SymA->getSymbol(), Fragment->getParent(), MRE); } void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp index 5892f1de33ee..807f7a6ddb19 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp @@ -44,7 +44,7 @@ unsigned X86WinCOFFObjectWriter::getRelocType(MCContext &Ctx, const MCAsmBackend &MAB) const { unsigned FixupKind = Fixup.getKind(); if (IsCrossSection) { - if (FixupKind != FK_Data_4) { + if (FixupKind != FK_Data_4 && FixupKind != llvm::X86::reloc_signed_4byte) { Ctx.reportError(Fixup.getLoc(), "Cannot represent this expression"); return COFF::IMAGE_REL_AMD64_ADDR32; } diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index fe105298f5c1..7437ebacfac3 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -300,6 +300,8 @@ def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom", "Intel Atom processors">; def ProcIntelSLM : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM", "Intel Silvermont processors">; +def ProcIntelGLM : SubtargetFeature<"glm", "X86ProcFamily", "IntelGLM", + "Intel Goldmont processors">; class Proc<string Name, list<SubtargetFeature> Features> : ProcessorModel<Name, GenericModel, Features>; @@ -430,6 +432,34 @@ class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [ def : SilvermontProc<"silvermont">; def : SilvermontProc<"slm">; // Legacy alias. +class GoldmontProc<string Name> : ProcessorModel<Name, SLMModel, [ + ProcIntelGLM, + FeatureX87, + FeatureMMX, + FeatureSSE42, + FeatureFXSR, + FeatureCMPXCHG16B, + FeatureMOVBE, + FeaturePOPCNT, + FeaturePCLMUL, + FeatureAES, + FeaturePRFCHW, + FeatureCallRegIndirect, + FeatureSlowLEA, + FeatureSlowIncDec, + FeatureSlowBTMem, + FeatureLAHFSAHF, + FeatureMPX, + FeatureSHA, + FeatureRDSEED, + FeatureXSAVE, + FeatureXSAVEOPT, + FeatureXSAVEC, + FeatureXSAVES, + FeatureCLFLUSHOPT +]>; +def : GoldmontProc<"goldmont">; + // "Arrandale" along with corei3 and corei5 class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [ FeatureX87, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f777e5628988..b89914f8893e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5065,6 +5065,20 @@ static SDValue insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, return insertSubVector(Result, Vec, IdxVal, DAG, dl, 256); } +// Return true if the instruction zeroes the unused upper part of the +// destination and accepts mask. +static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) { + switch (Opcode) { + default: + return false; + case X86ISD::PCMPEQM: + case X86ISD::PCMPGTM: + case X86ISD::CMPM: + case X86ISD::CMPMU: + return true; + } +} + /// Insert i1-subvector to i1-vector. static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { @@ -5097,6 +5111,22 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, // 3. Subvector should be inserted in the middle (for example v2i1 // to v16i1, index 2) + // If this node widens - by concatenating zeroes - the type of the result + // of a node with instruction that zeroes all upper (irrelevant) bits of the + // output register, mark this node as legal to enable replacing them with + // the v8i1 version of the previous instruction during instruction selection. + // For example, VPCMPEQDZ128rr instruction stores its v4i1 result in a k-reg, + // while zeroing all the upper remaining 60 bits of the register. if the + // result of such instruction is inserted into an allZeroVector, then we can + // safely remove insert_vector (in instruction selection) as the cmp instr + // already zeroed the rest of the register. + if (ISD::isBuildVectorAllZeros(Vec.getNode()) && IdxVal == 0 && + (isMaskedZeroUpperBitsvXi1(SubVec.getOpcode()) || + (SubVec.getOpcode() == ISD::AND && + (isMaskedZeroUpperBitsvXi1(SubVec.getOperand(0).getOpcode()) || + isMaskedZeroUpperBitsvXi1(SubVec.getOperand(1).getOpcode()))))) + return Op; + // extend to natively supported kshift MVT MinVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; MVT WideOpVT = OpVT; @@ -7919,6 +7949,60 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { return concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl); } +// Return true if all the operands of the given CONCAT_VECTORS node are zeros +// except for the first one. (CONCAT_VECTORS Op, 0, 0,...,0) +static bool isExpandWithZeros(const SDValue &Op) { + assert(Op.getOpcode() == ISD::CONCAT_VECTORS && + "Expand with zeros only possible in CONCAT_VECTORS nodes!"); + + for (unsigned i = 1; i < Op.getNumOperands(); i++) + if (!ISD::isBuildVectorAllZeros(Op.getOperand(i).getNode())) + return false; + + return true; +} + +// Returns true if the given node is a type promotion (by concatenating i1 +// zeros) of the result of a node that already zeros all upper bits of +// k-register. +static SDValue isTypePromotionOfi1ZeroUpBits(SDValue Op) { + unsigned Opc = Op.getOpcode(); + + assert(Opc == ISD::CONCAT_VECTORS && + Op.getSimpleValueType().getVectorElementType() == MVT::i1 && + "Unexpected node to check for type promotion!"); + + // As long as we are concatenating zeros to the upper part of a previous node + // result, climb up the tree until a node with different opcode is + // encountered + while (Opc == ISD::INSERT_SUBVECTOR || Opc == ISD::CONCAT_VECTORS) { + if (Opc == ISD::INSERT_SUBVECTOR) { + if (ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()) && + Op.getConstantOperandVal(2) == 0) + Op = Op.getOperand(1); + else + return SDValue(); + } else { // Opc == ISD::CONCAT_VECTORS + if (isExpandWithZeros(Op)) + Op = Op.getOperand(0); + else + return SDValue(); + } + Opc = Op.getOpcode(); + } + + // Check if the first inserted node zeroes the upper bits, or an 'and' result + // of a node that zeros the upper bits (its masked version). + if (isMaskedZeroUpperBitsvXi1(Op.getOpcode()) || + (Op.getOpcode() == ISD::AND && + (isMaskedZeroUpperBitsvXi1(Op.getOperand(0).getOpcode()) || + isMaskedZeroUpperBitsvXi1(Op.getOperand(1).getOpcode())))) { + return Op; + } + + return SDValue(); +} + static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG & DAG) { @@ -7929,6 +8013,17 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, assert(isPowerOf2_32(NumOfOperands) && "Unexpected number of operands in CONCAT_VECTORS"); + // If this node promotes - by concatenating zeroes - the type of the result + // of a node with instruction that zeroes all upper (irrelevant) bits of the + // output register, mark it as legal and catch the pattern in instruction + // selection to avoid emitting extra insturctions (for zeroing upper bits). + if (SDValue Promoted = isTypePromotionOfi1ZeroUpBits(Op)) { + SDValue ZeroC = DAG.getConstant(0, dl, MVT::i64); + SDValue AllZeros = DAG.getSplatBuildVector(ResVT, dl, ZeroC); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, AllZeros, Promoted, + ZeroC); + } + SDValue Undef = DAG.getUNDEF(ResVT); if (NumOfOperands > 2) { // Specialize the cases when all, or all but one, of the operands are undef. @@ -27012,6 +27107,9 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, unsigned &Shuffle, MVT &ShuffleVT, unsigned &PermuteImm) { unsigned NumMaskElts = Mask.size(); + unsigned InputSizeInBits = MaskVT.getSizeInBits(); + unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts; + MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits); bool ContainsZeros = false; APInt Zeroable(NumMaskElts, false); @@ -27027,7 +27125,7 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) { int ShiftAmt = matchVectorShuffleAsShift(ShuffleVT, Shuffle, - MaskVT.getScalarSizeInBits(), Mask, + MaskScalarSizeInBits, Mask, 0, Zeroable, Subtarget); if (0 < ShiftAmt) { PermuteImm = (unsigned)ShiftAmt; @@ -27043,10 +27141,6 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, return SM_SentinelUndef <= M && M < (int)NumMaskElts; }) && "Expected unary shuffle"); - unsigned InputSizeInBits = MaskVT.getSizeInBits(); - unsigned MaskScalarSizeInBits = InputSizeInBits / Mask.size(); - MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits); - // Handle PSHUFLW/PSHUFHW repeated patterns. if (MaskScalarSizeInBits == 16) { SmallVector<int, 4> RepeatedMask; @@ -35072,7 +35166,7 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, /// that is commonly recognized as an idiom (has no register dependency), so /// that's better/smaller than loading a splat 1 constant. static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG) { - assert(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB && + assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && "Unexpected opcode for increment/decrement transform"); // Pseudo-legality check: getOnesVector() expects one of these types, so bail diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 01a70323224c..cc5c09cbf0e5 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -185,6 +185,20 @@ def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info, def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info, v2f64x_info>; +class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm, + ValueType _vt> { + RegisterClass KRC = _krc; + RegisterClass KRCWM = _krcwm; + ValueType KVT = _vt; +} + +def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>; +def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>; +def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>; +def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>; +def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>; +def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>; + // This multiclass generates the masking variants from the non-masking // variant. It only provides the assembly pieces for the masking variants. // It assumes custom ISel patterns for masking which can be provided as @@ -1735,17 +1749,217 @@ defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, avx512vl_i64_info, HasAVX512>, T8PD, VEX_W, EVEX_CD8<64, CD8VF>; -let Predicates = [HasAVX512, NoVLX] in { -def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), - (COPY_TO_REGCLASS (VPCMPGTDZrr - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>; -def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), - (COPY_TO_REGCLASS (VPCMPEQDZrr - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>; -} +multiclass avx512_icmp_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf, + SDNode OpNode, string InstrStr, + list<Predicate> Preds> { +let Predicates = Preds in { + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rr) _.RC:$src1, _.RC:$src2), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), + (_.VT (bitconvert (_.LdFrag addr:$src2))))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rm) _.RC:$src1, addr:$src2), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and _.KRCWM:$mask, + (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrk) _.KRCWM:$mask, + _.RC:$src1, _.RC:$src2), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and (_.KVT _.KRCWM:$mask), + (_.KVT (OpNode (_.VT _.RC:$src1), + (_.VT (bitconvert + (_.LdFrag addr:$src2))))))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmk) _.KRCWM:$mask, + _.RC:$src1, addr:$src2), + NewInf.KRC)>; +} +} + +multiclass avx512_icmp_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf, + SDNode OpNode, string InstrStr, + list<Predicate> Preds> + : avx512_icmp_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> { +let Predicates = Preds in { + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), + (X86VBroadcast (_.ScalarLdFrag addr:$src2)))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmb) _.RC:$src1, addr:$src2), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and (_.KVT _.KRCWM:$mask), + (_.KVT (OpNode (_.VT _.RC:$src1), + (X86VBroadcast + (_.ScalarLdFrag addr:$src2)))))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbk) _.KRCWM:$mask, + _.RC:$src1, addr:$src2), + NewInf.KRC)>; +} +} + +// VPCMPEQB - i8 +defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpeqm, + "VPCMPEQBZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQBZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQBZ256", [HasBWI, HasVLX]>; + +// VPCMPEQW - i16 +defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpeqm, + "VPCMPEQWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpeqm, + "VPCMPEQWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQWZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpeqm, + "VPCMPEQWZ256", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQWZ256", [HasBWI, HasVLX]>; + +defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpeqm, + "VPCMPEQWZ", [HasBWI]>; + +// VPCMPEQD - i32 +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpeqm, + "VPCMPEQDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpeqm, + "VPCMPEQDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpeqm, + "VPCMPEQDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQDZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpeqm, + "VPCMPEQDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpeqm, + "VPCMPEQDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQDZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpeqm, + "VPCMPEQDZ", [HasAVX512]>; +defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpeqm, + "VPCMPEQDZ", [HasAVX512]>; + +// VPCMPEQQ - i64 +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpeqm, + "VPCMPEQQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpeqm, + "VPCMPEQQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpeqm, + "VPCMPEQQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpeqm, + "VPCMPEQQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQQZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpeqm, + "VPCMPEQQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpeqm, + "VPCMPEQQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpeqm, + "VPCMPEQQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQQZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpeqm, + "VPCMPEQQZ", [HasAVX512]>; +defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpeqm, + "VPCMPEQQZ", [HasAVX512]>; +defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpeqm, + "VPCMPEQQZ", [HasAVX512]>; + +// VPCMPGTB - i8 +defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpgtm, + "VPCMPGTBZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTBZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTBZ256", [HasBWI, HasVLX]>; + +// VPCMPGTW - i16 +defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpgtm, + "VPCMPGTWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpgtm, + "VPCMPGTWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTWZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpgtm, + "VPCMPGTWZ256", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTWZ256", [HasBWI, HasVLX]>; + +defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpgtm, + "VPCMPGTWZ", [HasBWI]>; + +// VPCMPGTD - i32 +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpgtm, + "VPCMPGTDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpgtm, + "VPCMPGTDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpgtm, + "VPCMPGTDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTDZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpgtm, + "VPCMPGTDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpgtm, + "VPCMPGTDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTDZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpgtm, + "VPCMPGTDZ", [HasAVX512]>; +defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpgtm, + "VPCMPGTDZ", [HasAVX512]>; + +// VPCMPGTQ - i64 +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpgtm, + "VPCMPGTQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpgtm, + "VPCMPGTQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpgtm, + "VPCMPGTQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpgtm, + "VPCMPGTQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTQZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpgtm, + "VPCMPGTQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpgtm, + "VPCMPGTQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpgtm, + "VPCMPGTQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTQZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpgtm, + "VPCMPGTQZ", [HasAVX512]>; +defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpgtm, + "VPCMPGTQZ", [HasAVX512]>; +defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpgtm, + "VPCMPGTQZ", [HasAVX512]>; multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode, X86VectorVTInfo _> { @@ -1908,6 +2122,237 @@ defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info, defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info, HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; +multiclass avx512_icmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf, + SDNode OpNode, string InstrStr, + list<Predicate> Preds> { +let Predicates = Preds in { + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1, + _.RC:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), + (_.VT (bitconvert (_.LdFrag addr:$src2))), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and _.KRCWM:$mask, + (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrik) _.KRCWM:$mask, + _.RC:$src1, + _.RC:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and (_.KVT _.KRCWM:$mask), + (_.KVT (OpNode (_.VT _.RC:$src1), + (_.VT (bitconvert + (_.LdFrag addr:$src2))), + imm:$cc)))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmik) _.KRCWM:$mask, + _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; +} +} + +multiclass avx512_icmp_cc_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf, + SDNode OpNode, string InstrStr, + list<Predicate> Preds> + : avx512_icmp_cc_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> { +let Predicates = Preds in { + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), + (X86VBroadcast (_.ScalarLdFrag addr:$src2)), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmib) _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and (_.KVT _.KRCWM:$mask), + (_.KVT (OpNode (_.VT _.RC:$src1), + (X86VBroadcast + (_.ScalarLdFrag addr:$src2)), + imm:$cc)))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmibk) _.KRCWM:$mask, + _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; +} +} + +// VPCMPB - i8 +defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpm, + "VPCMPBZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpm, + "VPCMPBZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpm, + "VPCMPBZ256", [HasBWI, HasVLX]>; + +// VPCMPW - i16 +defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpm, + "VPCMPWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpm, + "VPCMPWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpm, + "VPCMPWZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpm, + "VPCMPWZ256", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpm, + "VPCMPWZ256", [HasBWI, HasVLX]>; + +defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpm, + "VPCMPWZ", [HasBWI]>; + +// VPCMPD - i32 +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpm, + "VPCMPDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpm, + "VPCMPDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpm, + "VPCMPDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpm, + "VPCMPDZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpm, + "VPCMPDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpm, + "VPCMPDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpm, + "VPCMPDZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpm, + "VPCMPDZ", [HasAVX512]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpm, + "VPCMPDZ", [HasAVX512]>; + +// VPCMPQ - i64 +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpm, + "VPCMPQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpm, + "VPCMPQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpm, + "VPCMPQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpm, + "VPCMPQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpm, + "VPCMPQZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpm, + "VPCMPQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpm, + "VPCMPQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpm, + "VPCMPQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpm, + "VPCMPQZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpm, + "VPCMPQZ", [HasAVX512]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpm, + "VPCMPQZ", [HasAVX512]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpm, + "VPCMPQZ", [HasAVX512]>; + +// VPCMPUB - i8 +defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpmu, + "VPCMPUBZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpmu, + "VPCMPUBZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpmu, + "VPCMPUBZ256", [HasBWI, HasVLX]>; + +// VPCMPUW - i16 +defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpmu, + "VPCMPUWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpmu, + "VPCMPUWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpmu, + "VPCMPUWZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpmu, + "VPCMPUWZ256", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpmu, + "VPCMPUWZ256", [HasBWI, HasVLX]>; + +defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpmu, + "VPCMPUWZ", [HasBWI]>; + +// VPCMPUD - i32 +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpmu, + "VPCMPUDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpmu, + "VPCMPUDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpmu, + "VPCMPUDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpmu, + "VPCMPUDZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpmu, + "VPCMPUDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpmu, + "VPCMPUDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpmu, + "VPCMPUDZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpmu, + "VPCMPUDZ", [HasAVX512]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpmu, + "VPCMPUDZ", [HasAVX512]>; + +// VPCMPUQ - i64 +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpmu, + "VPCMPUQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpmu, + "VPCMPUQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpmu, + "VPCMPUQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpmu, + "VPCMPUQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpmu, + "VPCMPUQZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpmu, + "VPCMPUQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpmu, + "VPCMPUQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpmu, + "VPCMPUQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpmu, + "VPCMPUQZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpmu, + "VPCMPUQZ", [HasAVX512]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpmu, + "VPCMPUQZ", [HasAVX512]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpmu, + "VPCMPUQZ", [HasAVX512]>; + multiclass avx512_vcmp_common<X86VectorVTInfo _> { defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, @@ -1998,21 +2443,108 @@ defm VCMPPD : avx512_vcmp<avx512vl_f64_info>, defm VCMPPS : avx512_vcmp<avx512vl_f32_info>, AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; -def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)), - (COPY_TO_REGCLASS (VCMPPSZrri - (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; -def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), - (COPY_TO_REGCLASS (VPCMPDZrri - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; -def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), - (COPY_TO_REGCLASS (VPCMPUDZrri - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; +multiclass avx512_fcmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf, + string InstrStr, list<Predicate> Preds> { +let Predicates = Preds in { + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (X86cmpm (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1, + _.RC:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (X86cmpm (_.VT _.RC:$src1), + (_.VT (bitconvert (_.LdFrag addr:$src2))), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (X86cmpm (_.VT _.RC:$src1), + (X86VBroadcast (_.ScalarLdFrag addr:$src2)), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbi) _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; +} +} + +multiclass avx512_fcmp_cc_packed_sae_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf, + string InstrStr, list<Predicate> Preds> + : avx512_fcmp_cc_packed_lowering<_, NewInf, InstrStr, Preds> { + +let Predicates = Preds in + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (X86cmpmRnd (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc, + (i32 FROUND_NO_EXC))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrib) _.RC:$src1, + _.RC:$src2, + imm:$cc), + NewInf.KRC)>; +} + + +// VCMPPS - f32 +defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v8i1_info, "VCMPPSZ128", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v16i1_info, "VCMPPSZ128", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v32i1_info, "VCMPPSZ128", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v64i1_info, "VCMPPSZ128", + [HasAVX512, HasVLX]>; + +defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v16i1_info, "VCMPPSZ256", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v32i1_info, "VCMPPSZ256", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v64i1_info, "VCMPPSZ256", + [HasAVX512, HasVLX]>; + +defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v32i1_info, "VCMPPSZ", + [HasAVX512]>; +defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v64i1_info, "VCMPPSZ", + [HasAVX512]>; + +// VCMPPD - f64 +defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v4i1_info, "VCMPPDZ128", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v8i1_info, "VCMPPDZ128", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v16i1_info, "VCMPPDZ128", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v32i1_info, "VCMPPDZ128", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v64i1_info, "VCMPPDZ128", + [HasAVX512, HasVLX]>; + +defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v8i1_info, "VCMPPDZ256", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v16i1_info, "VCMPPDZ256", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v32i1_info, "VCMPPDZ256", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v64i1_info, "VCMPPDZ256", + [HasAVX512, HasVLX]>; + +defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v16i1_info, "VCMPPDZ", + [HasAVX512]>; +defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v32i1_info, "VCMPPDZ", + [HasAVX512]>; +defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v64i1_info, "VCMPPDZ", + [HasAVX512]>; // ---------------------------------------------------------------- // FPClass @@ -2498,6 +3030,69 @@ multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr, defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl>; defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr>; +multiclass axv512_icmp_packed_no_vlx_lowering<SDNode OpNode, string InstStr> { +def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), + (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrr) + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>; + +def : Pat<(insert_subvector (v16i1 immAllZerosV), + (v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), + (i64 0)), + (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrr) + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), + (i8 8)), (i8 8))>; + +def : Pat<(insert_subvector (v16i1 immAllZerosV), + (v8i1 (and VK8:$mask, + (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))), + (i64 0)), + (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrk) + (COPY_TO_REGCLASS VK8:$mask, VK16), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), + (i8 8)), (i8 8))>; +} + +multiclass axv512_icmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr, + AVX512VLVectorVTInfo _> { +def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrri) + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), + imm:$cc), VK8)>; + +def : Pat<(insert_subvector (v16i1 immAllZerosV), + (v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)), + (i64 0)), + (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrri) + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), + imm:$cc), + (i8 8)), (i8 8))>; + +def : Pat<(insert_subvector (v16i1 immAllZerosV), + (v8i1 (and VK8:$mask, + (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc))), + (i64 0)), + (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrik) + (COPY_TO_REGCLASS VK8:$mask, VK16), + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), + imm:$cc), + (i8 8)), (i8 8))>; +} + +let Predicates = [HasAVX512, NoVLX] in { + defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD">; + defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQD">; + + defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", avx512vl_f32_info>; + defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", avx512vl_i32_info>; + defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", avx512vl_i32_info>; +} + // Mask setting all 0s or 1s multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> { let Predicates = [HasAVX512] in diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp index f98c2a7e802d..e34a90e975b8 100644 --- a/lib/Target/X86/X86InstructionSelector.cpp +++ b/lib/Target/X86/X86InstructionSelector.cpp @@ -75,6 +75,8 @@ private: bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const; + bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI, @@ -270,6 +272,8 @@ bool X86InstructionSelector::select(MachineInstr &I) const { return true; if (selectUadde(I, MRI, MF)) return true; + if (selectMergeValues(I, MRI, MF)) + return true; if (selectExtract(I, MRI, MF)) return true; if (selectInsert(I, MRI, MF)) @@ -914,6 +918,55 @@ bool X86InstructionSelector::selectInsert(MachineInstr &I, return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } +bool X86InstructionSelector::selectMergeValues(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + if (I.getOpcode() != TargetOpcode::G_MERGE_VALUES) + return false; + + // Split to inserts. + unsigned DstReg = I.getOperand(0).getReg(); + unsigned SrcReg0 = I.getOperand(1).getReg(); + + const LLT DstTy = MRI.getType(DstReg); + const LLT SrcTy = MRI.getType(SrcReg0); + unsigned SrcSize = SrcTy.getSizeInBits(); + + const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); + + // For the first src use insertSubReg. + unsigned DefReg = MRI.createGenericVirtualRegister(DstTy); + MRI.setRegBank(DefReg, RegBank); + if (!emitInsertSubreg(DefReg, I.getOperand(1).getReg(), I, MRI, MF)) + return false; + + for (unsigned Idx = 2; Idx < I.getNumOperands(); ++Idx) { + + unsigned Tmp = MRI.createGenericVirtualRegister(DstTy); + MRI.setRegBank(Tmp, RegBank); + + MachineInstr &InsertInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(TargetOpcode::G_INSERT), Tmp) + .addReg(DefReg) + .addReg(I.getOperand(Idx).getReg()) + .addImm((Idx - 1) * SrcSize); + + DefReg = Tmp; + + if (!select(InsertInst)) + return false; + } + + MachineInstr &CopyInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(TargetOpcode::COPY), DstReg) + .addReg(DefReg); + + if (!select(CopyInst)) + return false; + + I.eraseFromParent(); + return true; +} InstructionSelector * llvm::createX86InstructionSelector(const X86TargetMachine &TM, X86Subtarget &Subtarget, diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp index a584eabcc1b2..a5fa3340c3f1 100644 --- a/lib/Target/X86/X86LegalizerInfo.cpp +++ b/lib/Target/X86/X86LegalizerInfo.cpp @@ -56,7 +56,7 @@ void X86LegalizerInfo::setLegalizerInfo32bit() { const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); - for (unsigned BinOp : {G_ADD, G_SUB, G_MUL}) + for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) for (auto Ty : {s8, s16, s32}) setAction({BinOp, Ty}, Legal); @@ -117,7 +117,7 @@ void X86LegalizerInfo::setLegalizerInfo64bit() { const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); - for (unsigned BinOp : {G_ADD, G_SUB, G_MUL}) + for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) for (auto Ty : {s8, s16, s32, s64}) setAction({BinOp, Ty}, Legal); @@ -228,10 +228,14 @@ void X86LegalizerInfo::setLegalizerInfoAVX() { for (auto Ty : {v8s32, v4s64}) setAction({MemOp, Ty}, Legal); - for (auto Ty : {v32s8, v16s16, v8s32, v4s64}) + for (auto Ty : {v32s8, v16s16, v8s32, v4s64}) { setAction({G_INSERT, Ty}, Legal); - for (auto Ty : {v16s8, v8s16, v4s32, v2s64}) + setAction({G_EXTRACT, 1, Ty}, Legal); + } + for (auto Ty : {v16s8, v8s16, v4s32, v2s64}) { setAction({G_INSERT, 1, Ty}, Legal); + setAction({G_EXTRACT, Ty}, Legal); + } } void X86LegalizerInfo::setLegalizerInfoAVX2() { @@ -280,10 +284,14 @@ void X86LegalizerInfo::setLegalizerInfoAVX512() { for (auto Ty : {v16s32, v8s64}) setAction({MemOp, Ty}, Legal); - for (auto Ty : {v64s8, v32s16, v16s32, v8s64}) + for (auto Ty : {v64s8, v32s16, v16s32, v8s64}) { setAction({G_INSERT, Ty}, Legal); - for (auto Ty : {v32s8, v16s16, v8s32, v4s64, v16s8, v8s16, v4s32, v2s64}) + setAction({G_EXTRACT, 1, Ty}, Legal); + } + for (auto Ty : {v32s8, v16s16, v8s32, v4s64, v16s8, v8s16, v4s32, v2s64}) { setAction({G_INSERT, 1, Ty}, Legal); + setAction({G_EXTRACT, Ty}, Legal); + } /************ VLX *******************/ if (!Subtarget.hasVLX()) diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index e36a47506ba0..24845beac22d 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -11,10 +11,23 @@ // //===----------------------------------------------------------------------===// +#include "X86.h" + +#ifdef LLVM_BUILD_GLOBAL_ISEL +#include "X86CallLowering.h" +#include "X86LegalizerInfo.h" +#include "X86RegisterBankInfo.h" +#endif #include "X86Subtarget.h" #include "MCTargetDesc/X86BaseInfo.h" #include "X86TargetMachine.h" #include "llvm/ADT/Triple.h" +#ifdef LLVM_BUILD_GLOBAL_ISEL +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" +#endif #include "llvm/IR/Attributes.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Function.h" @@ -336,6 +349,35 @@ X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU, return *this; } +#ifdef LLVM_BUILD_GLOBAL_ISEL +namespace { + +struct X86GISelActualAccessor : public GISelAccessor { + std::unique_ptr<CallLowering> CallLoweringInfo; + std::unique_ptr<LegalizerInfo> Legalizer; + std::unique_ptr<RegisterBankInfo> RegBankInfo; + std::unique_ptr<InstructionSelector> InstSelector; + + const CallLowering *getCallLowering() const override { + return CallLoweringInfo.get(); + } + + const InstructionSelector *getInstructionSelector() const override { + return InstSelector.get(); + } + + const LegalizerInfo *getLegalizerInfo() const override { + return Legalizer.get(); + } + + const RegisterBankInfo *getRegBankInfo() const override { + return RegBankInfo.get(); + } +}; + +} // end anonymous namespace +#endif + X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, const X86TargetMachine &TM, unsigned StackAlignOverride) @@ -360,6 +402,19 @@ X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, setPICStyle(PICStyles::StubPIC); else if (isTargetELF()) setPICStyle(PICStyles::GOT); +#ifndef LLVM_BUILD_GLOBAL_ISEL + GISelAccessor *GISel = new GISelAccessor(); +#else + X86GISelActualAccessor *GISel = new X86GISelActualAccessor(); + + GISel->CallLoweringInfo.reset(new X86CallLowering(*getTargetLowering())); + GISel->Legalizer.reset(new X86LegalizerInfo(*this, TM)); + + auto *RBI = new X86RegisterBankInfo(*getRegisterInfo()); + GISel->RegBankInfo.reset(RBI); + GISel->InstSelector.reset(createX86InstructionSelector(TM, *this, *RBI)); +#endif + setGISelAccessor(*GISel); } const CallLowering *X86Subtarget::getCallLowering() const { diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 550e95c39ab5..fa0afe29586b 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -58,7 +58,7 @@ protected: }; enum X86ProcFamilyEnum { - Others, IntelAtom, IntelSLM + Others, IntelAtom, IntelSLM, IntelGLM }; /// X86 processor family: Intel Atom, and others diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index a9f42cacf788..8d891c983fab 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -15,9 +15,6 @@ #include "X86.h" #include "X86CallLowering.h" #include "X86LegalizerInfo.h" -#ifdef LLVM_BUILD_GLOBAL_ISEL -#include "X86RegisterBankInfo.h" -#endif #include "X86MacroFusion.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" @@ -31,7 +28,6 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/ExecutionDepsFix.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" -#include "llvm/CodeGen/GlobalISel/GISelAccessor.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" @@ -212,35 +208,6 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT, X86TargetMachine::~X86TargetMachine() = default; -#ifdef LLVM_BUILD_GLOBAL_ISEL -namespace { - -struct X86GISelActualAccessor : public GISelAccessor { - std::unique_ptr<CallLowering> CallLoweringInfo; - std::unique_ptr<LegalizerInfo> Legalizer; - std::unique_ptr<RegisterBankInfo> RegBankInfo; - std::unique_ptr<InstructionSelector> InstSelector; - - const CallLowering *getCallLowering() const override { - return CallLoweringInfo.get(); - } - - const InstructionSelector *getInstructionSelector() const override { - return InstSelector.get(); - } - - const LegalizerInfo *getLegalizerInfo() const override { - return Legalizer.get(); - } - - const RegisterBankInfo *getRegBankInfo() const override { - return RegBankInfo.get(); - } -}; - -} // end anonymous namespace -#endif - const X86Subtarget * X86TargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); @@ -280,20 +247,6 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { resetTargetOptions(F); I = llvm::make_unique<X86Subtarget>(TargetTriple, CPU, FS, *this, Options.StackAlignmentOverride); -#ifndef LLVM_BUILD_GLOBAL_ISEL - GISelAccessor *GISel = new GISelAccessor(); -#else - X86GISelActualAccessor *GISel = new X86GISelActualAccessor(); - - GISel->CallLoweringInfo.reset(new X86CallLowering(*I->getTargetLowering())); - GISel->Legalizer.reset(new X86LegalizerInfo(*I, *this)); - - auto *RBI = new X86RegisterBankInfo(*I->getRegisterInfo()); - GISel->RegBankInfo.reset(RBI); - GISel->InstSelector.reset(createX86InstructionSelector( - *this, *I, *RBI)); -#endif - I->setGISelAccessor(*GISel); } return I.get(); } diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 1bf267d34ec2..aaa6d58bd134 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -40,6 +40,7 @@ public: ~X86TargetMachine() override; const X86Subtarget *getSubtargetImpl(const Function &F) const override; + const X86Subtarget *getSubtargetImpl() const = delete; TargetIRAnalysis getTargetIRAnalysis() override; |