diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-12-30 13:13:10 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-12-30 13:13:10 +0000 |
commit | 7d523365ff1a3cc95bc058b33102500f61e8166d (patch) | |
tree | b466a4817f79516eb1df8eae92bccf62ecc84003 /contrib/llvm/lib/CodeGen/SelectionDAG | |
parent | e3b65fde506060bec5cd110fcf03b440bd0eea1d (diff) | |
parent | dd58ef019b700900793a1eb48b52123db01b654e (diff) |
Update llvm to trunk r256633.
Notes
Notes:
svn path=/projects/clang380-import/; revision=292941
Diffstat (limited to 'contrib/llvm/lib/CodeGen/SelectionDAG')
24 files changed, 5051 insertions, 2921 deletions
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 3b29306bb54a..0872d7a9a228 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -156,13 +156,16 @@ namespace { void deleteAndRecombine(SDNode *N); bool recursivelyDeleteUnusedNodes(SDNode *N); + /// Replaces all uses of the results of one DAG node with new values. SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, bool AddTo = true); + /// Replaces all uses of the results of one DAG node with new values. SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { return CombineTo(N, &Res, 1, AddTo); } + /// Replaces all uses of the results of one DAG node with new values. SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true) { SDValue To[] = { Res0, Res1 }; @@ -233,18 +236,17 @@ namespace { SDValue visitADDE(SDNode *N); SDValue visitSUBE(SDNode *N); SDValue visitMUL(SDNode *N); + SDValue useDivRem(SDNode *N); SDValue visitSDIV(SDNode *N); SDValue visitUDIV(SDNode *N); - SDValue visitSREM(SDNode *N); - SDValue visitUREM(SDNode *N); + SDValue visitREM(SDNode *N); SDValue visitMULHU(SDNode *N); SDValue visitMULHS(SDNode *N); SDValue visitSMUL_LOHI(SDNode *N); SDValue visitUMUL_LOHI(SDNode *N); SDValue visitSMULO(SDNode *N); SDValue visitUMULO(SDNode *N); - SDValue visitSDIVREM(SDNode *N); - SDValue visitUDIVREM(SDNode *N); + SDValue visitIMINMAX(SDNode *N); SDValue visitAND(SDNode *N); SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference); SDValue visitOR(SDNode *N); @@ -265,6 +267,7 @@ namespace { SDValue visitVSELECT(SDNode *N); SDValue visitSELECT_CC(SDNode *N); SDValue visitSETCC(SDNode *N); + SDValue visitSETCCE(SDNode *N); SDValue visitSIGN_EXTEND(SDNode *N); SDValue visitZERO_EXTEND(SDNode *N); SDValue visitANY_EXTEND(SDNode *N); @@ -298,6 +301,10 @@ namespace { SDValue visitBRCOND(SDNode *N); SDValue visitBR_CC(SDNode *N); SDValue visitLOAD(SDNode *N); + + SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain); + SDValue replaceStoreOfFPConstant(StoreSDNode *ST); + SDValue visitSTORE(SDNode *N); SDValue visitINSERT_VECTOR_ELT(SDNode *N); SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); @@ -312,9 +319,11 @@ namespace { SDValue visitMGATHER(SDNode *N); SDValue visitMSCATTER(SDNode *N); SDValue visitFP_TO_FP16(SDNode *N); + SDValue visitFP16_TO_FP(SDNode *N); SDValue visitFADDForFMACombine(SDNode *N); SDValue visitFSUBForFMACombine(SDNode *N); + SDValue visitFMULForFMACombine(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); @@ -338,14 +347,17 @@ namespace { unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); SDValue CombineExtLoad(SDNode *N); + SDValue combineRepeatedFPDivisors(SDNode *N); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); - SDValue BuildReciprocalEstimate(SDValue Op); - SDValue BuildRsqrtEstimate(SDValue Op); - SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations); - SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations); + SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags); + SDValue BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags); + SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, + SDNodeFlags *Flags); + SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, + SDNodeFlags *Flags); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); @@ -374,6 +386,10 @@ namespace { /// chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); + /// Do FindBetterChain for a store and any possibly adjacent stores on + /// consecutive chains. + bool findBetterNeighborChains(StoreSDNode *St); + /// Holds a pointer to an LSBaseSDNode as well as information on where it /// is located in a sequence of memory operations connected by a chain. struct MemOpLink { @@ -388,19 +404,37 @@ namespace { unsigned SequenceNum; }; + /// This is a helper function for visitMUL to check the profitability + /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). + /// MulNode is the original multiply, AddNode is (add x, c1), + /// and ConstNode is c2. + bool isMulAddWithConstProfitable(SDNode *MulNode, + SDValue &AddNode, + SDValue &ConstNode); + /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a /// constant build_vector of the stored constant values in Stores. SDValue getMergedConstantVectorStore(SelectionDAG &DAG, SDLoc SL, ArrayRef<MemOpLink> Stores, + SmallVectorImpl<SDValue> &Chains, EVT Ty) const; + /// This is a helper function for visitAND and visitZERO_EXTEND. Returns + /// true if the (and (load x) c) pattern matches an extload. ExtVT returns + /// the type of the loaded value to be extended. LoadedVT returns the type + /// of the original loaded value. NarrowLoad returns whether the load would + /// need to be narrowed in order to match. + bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, + EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, + bool &NarrowLoad); + /// This is a helper function for MergeConsecutiveStores. When the source /// elements of the consecutive stores are all constants or all extracted /// vector elements, try to merge them into one larger store. /// \return True if a merged store was created. bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, - EVT MemVT, unsigned NumElem, + EVT MemVT, unsigned NumStores, bool IsConstantSrc, bool UseVector); /// This is a helper function for MergeConsecutiveStores. @@ -409,7 +443,7 @@ namespace { void getStoreMergeAndAliasCandidates( StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes, SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes); - + /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. /// \return True if some memory operations were changed. @@ -427,9 +461,7 @@ namespace { DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { - auto *F = DAG.getMachineFunction().getFunction(); - ForCodeSize = F->hasFnAttribute(Attribute::OptimizeForSize) || - F->hasFnAttribute(Attribute::MinSize); + ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize(); } /// Runs the dag combiner on all nodes in the work list @@ -606,6 +638,9 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, assert(Op.hasOneUse() && "Unknown reuse!"); assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); + + const SDNodeFlags *Flags = Op.getNode()->getFlags(); + switch (Op.getOpcode()) { default: llvm_unreachable("Unknown code"); case ISD::ConstantFP: { @@ -623,12 +658,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), - Op.getOperand(1)); + Op.getOperand(1), Flags); // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(1), DAG, LegalOperations, Depth+1), - Op.getOperand(0)); + Op.getOperand(0), Flags); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. assert(Options.UnsafeFPMath); @@ -640,7 +675,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, // fold (fneg (fsub A, B)) -> (fsub B, A) return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - Op.getOperand(1), Op.getOperand(0)); + Op.getOperand(1), Op.getOperand(0), Flags); case ISD::FMUL: case ISD::FDIV: @@ -652,13 +687,13 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), - Op.getOperand(1)); + Op.getOperand(1), Flags); // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0), GetNegatedExpression(Op.getOperand(1), DAG, - LegalOperations, Depth+1)); + LegalOperations, Depth+1), Flags); case ISD::FP_EXTEND: case ISD::FSIN: @@ -1216,9 +1251,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) { LegalTypes = Level >= AfterLegalizeTypes; // Add all the dag nodes to the worklist. - for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = DAG.allnodes_end(); I != E; ++I) - AddToWorklist(I); + for (SDNode &Node : DAG.allnodes()) + AddToWorklist(&Node); // Create a dummy node (which is not added to allnodes), that adds a reference // to the root node, preventing it from being deleted, and tracking any @@ -1333,16 +1367,18 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::MUL: return visitMUL(N); case ISD::SDIV: return visitSDIV(N); case ISD::UDIV: return visitUDIV(N); - case ISD::SREM: return visitSREM(N); - case ISD::UREM: return visitUREM(N); + case ISD::SREM: + case ISD::UREM: return visitREM(N); case ISD::MULHU: return visitMULHU(N); case ISD::MULHS: return visitMULHS(N); case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); case ISD::SMULO: return visitSMULO(N); case ISD::UMULO: return visitUMULO(N); - case ISD::SDIVREM: return visitSDIVREM(N); - case ISD::UDIVREM: return visitUDIVREM(N); + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: return visitIMINMAX(N); case ISD::AND: return visitAND(N); case ISD::OR: return visitOR(N); case ISD::XOR: return visitXOR(N); @@ -1361,6 +1397,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::VSELECT: return visitVSELECT(N); case ISD::SELECT_CC: return visitSELECT_CC(N); case ISD::SETCC: return visitSETCC(N); + case ISD::SETCCE: return visitSETCCE(N); case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); case ISD::ANY_EXTEND: return visitANY_EXTEND(N); @@ -1408,6 +1445,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::MSCATTER: return visitMSCATTER(N); case ISD::MSTORE: return visitMSTORE(N); case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); + case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); } return SDValue(); } @@ -1470,13 +1508,8 @@ SDValue DAGCombiner::combine(SDNode *N) { // Constant operands are canonicalized to RHS. if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { SDValue Ops[] = {N1, N0}; - SDNode *CSENode; - if (const auto *BinNode = dyn_cast<BinaryWithFlagsSDNode>(N)) { - CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, - &BinNode->Flags); - } else { - CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops); - } + SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, + N->getFlags()); if (CSENode) return SDValue(CSENode, 0); } @@ -1595,26 +1628,6 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } -static bool isNullConstant(SDValue V) { - ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); - return Const != nullptr && Const->isNullValue(); -} - -static bool isNullFPConstant(SDValue V) { - ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V); - return Const != nullptr && Const->isZero() && !Const->isNegative(); -} - -static bool isAllOnesConstant(SDValue V) { - ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); - return Const != nullptr && Const->isAllOnesValue(); -} - -static bool isOneConstant(SDValue V) { - ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); - return Const != nullptr && Const->isOne(); -} - /// If \p N is a ContantSDNode with isOpaque() == false return it casted to a /// ContantSDNode pointer else nullptr. static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { @@ -1721,22 +1734,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return SDValue(N, 0); // fold (a+b) -> (a|b) iff a and b share no bits. - if (VT.isInteger() && !VT.isVector()) { - APInt LHSZero, LHSOne; - APInt RHSZero, RHSOne; - DAG.computeKnownBits(N0, LHSZero, LHSOne); - - if (LHSZero.getBoolValue()) { - DAG.computeKnownBits(N1, RHSZero, RHSOne); - - // If all possibly-set bits on the LHS are clear on the RHS, return an OR. - // If all possibly-set bits on the RHS are clear on the LHS, return an OR. - if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){ - if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); - } - } - } + if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && + VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1)) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && @@ -1971,31 +1971,26 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); + SDLoc DL(N); // If the flag result is dead, turn this into an SUB. if (!N->hasAnyUseOfValue(1)) - return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1), - DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), - MVT::Glue)); + return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); // fold (subc x, x) -> 0 + no borrow - if (N0 == N1) { - SDLoc DL(N); + if (N0 == N1) return CombineTo(N, DAG.getConstant(0, DL, VT), - DAG.getNode(ISD::CARRY_FALSE, DL, - MVT::Glue)); - } + DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); // fold (subc x, 0) -> x + no borrow if (isNullConstant(N1)) - return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), - MVT::Glue)); + return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow if (isAllOnesConstant(N0)) - return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0), - DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), - MVT::Glue)); + return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), + DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); return SDValue(); } @@ -2130,14 +2125,15 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) - if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && - (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || - isa<ConstantSDNode>(N0.getOperand(1)))) - return DAG.getNode(ISD::ADD, SDLoc(N), VT, - DAG.getNode(ISD::MUL, SDLoc(N0), VT, - N0.getOperand(0), N1), - DAG.getNode(ISD::MUL, SDLoc(N1), VT, - N0.getOperand(1), N1)); + if (isConstantIntBuildVectorOrConstantInt(N1) && + N0.getOpcode() == ISD::ADD && + isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && + isMulAddWithConstProfitable(N, N0, N1)) + return DAG.getNode(ISD::ADD, SDLoc(N), VT, + DAG.getNode(ISD::MUL, SDLoc(N0), VT, + N0.getOperand(0), N1), + DAG.getNode(ISD::MUL, SDLoc(N1), VT, + N0.getOperand(1), N1)); // reassociate mul if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1)) @@ -2146,6 +2142,88 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { return SDValue(); } +/// Return true if divmod libcall is available. +static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, + const TargetLowering &TLI) { + RTLIB::Libcall LC; + switch (Node->getSimpleValueType(0).SimpleTy) { + default: return false; // No libcall for vector types. + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; + } + + return TLI.getLibcallName(LC) != nullptr; +} + +/// Issue divrem if both quotient and remainder are needed. +SDValue DAGCombiner::useDivRem(SDNode *Node) { + if (Node->use_empty()) + return SDValue(); // This is a dead node, leave it alone. + + EVT VT = Node->getValueType(0); + if (!TLI.isTypeLegal(VT)) + return SDValue(); + + unsigned Opcode = Node->getOpcode(); + bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM); + + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + // If DIVREM is going to get expanded into a libcall, + // but there is no libcall available, then don't combine. + if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) && + !isDivRemLibcallAvailable(Node, isSigned, TLI)) + return SDValue(); + + // If div is legal, it's better to do the normal expansion + unsigned OtherOpcode = 0; + if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) { + OtherOpcode = isSigned ? ISD::SREM : ISD::UREM; + if (TLI.isOperationLegalOrCustom(Opcode, VT)) + return SDValue(); + } else { + OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV; + if (TLI.isOperationLegalOrCustom(OtherOpcode, VT)) + return SDValue(); + } + + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + SDValue combined; + for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), + UE = Op0.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (User == Node || User->use_empty()) + continue; + // Convert the other matching node(s), too; + // otherwise, the DIVREM may get target-legalized into something + // target-specific that we won't be able to recognize. + unsigned UserOpc = User->getOpcode(); + if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) && + User->getOperand(0) == Op0 && + User->getOperand(1) == Op1) { + if (!combined) { + if (UserOpc == OtherOpcode) { + SDVTList VTs = DAG.getVTList(VT, VT); + combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1); + } else if (UserOpc == DivRemOpc) { + combined = SDValue(User, 0); + } else { + assert(UserOpc == Opcode); + continue; + } + } + if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV) + CombineTo(User, combined); + else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM) + CombineTo(User, combined.getValue(1)); + } + } + return combined; +} + SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2156,26 +2234,26 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; + SDLoc DL(N); + // fold (sdiv c1, c2) -> c1/c2 ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque()) - return DAG.FoldConstantArithmetic(ISD::SDIV, SDLoc(N), VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C); // fold (sdiv X, 1) -> X if (N1C && N1C->isOne()) return N0; // fold (sdiv X, -1) -> 0-X - if (N1C && N1C->isAllOnesValue()) { - SDLoc DL(N); + if (N1C && N1C->isAllOnesValue()) return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); - } + // If we know the sign bits of both operands are zero, strength reduce to a // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 if (!VT.isVector()) { if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(), - N0, N1); + return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); } // fold (sdiv X, pow2) -> simple ops after legalize @@ -2186,18 +2264,11 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() && (N1C->getAPIntValue().isPowerOf2() || (-N1C->getAPIntValue()).isPowerOf2())) { - // If dividing by powers of two is cheap, then don't perform the following - // fold. - if (TLI.isPow2SDivCheap()) - return SDValue(); - // Target-specific implementation of sdiv x, pow2. - SDValue Res = BuildSDIVPow2(N); - if (Res.getNode()) + if (SDValue Res = BuildSDIVPow2(N)) return Res; unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); - SDLoc DL(N); // Splat the sign bit into the register SDValue SGN = @@ -2228,15 +2299,23 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { } // If integer divide is expensive and we satisfy the requirements, emit an - // alternate sequence. - if (N1C && !TLI.isIntDivCheap()) { - SDValue Op = BuildSDIV(N); - if (Op.getNode()) return Op; - } + // alternate sequence. Targets may check function attributes for size/speed + // trade-offs. + AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue Op = BuildSDIV(N)) + return Op; + + // sdiv, srem -> sdivrem + // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true. + // Otherwise, we break the simplification logic in visitREM(). + if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue DivRem = useDivRem(N)) + return DivRem; // undef / X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, SDLoc(N), VT); + return DAG.getConstant(0, DL, VT); // X / undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2254,26 +2333,26 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; + SDLoc DL(N); + // fold (udiv c1, c2) -> c1/c2 ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C) - if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT, + if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, N0C, N1C)) return Folded; // fold (udiv x, (1 << c)) -> x >>u c - if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) { - SDLoc DL(N); + if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(N1C->getAPIntValue().logBase2(), DL, getShiftAmountTy(N0.getValueType()))); - } + // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 if (N1.getOpcode() == ISD::SHL) { if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { EVT ADDVT = N1.getOperand(1).getValueType(); - SDLoc DL(N); SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), DAG.getConstant(SHC->getAPIntValue() @@ -2284,15 +2363,23 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { } } } + // fold (udiv x, c) -> alternate - if (N1C && !TLI.isIntDivCheap()) { - SDValue Op = BuildUDIV(N); - if (Op.getNode()) return Op; - } + AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue Op = BuildUDIV(N)) + return Op; + + // sdiv, srem -> sdivrem + // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true. + // Otherwise, we break the simplification logic in visitREM(). + if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue DivRem = useDivRem(N)) + return DivRem; // undef / X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, SDLoc(N), VT); + return DAG.getConstant(0, DL, VT); // X / undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2300,102 +2387,83 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitSREM(SDNode *N) { +// handles ISD::SREM and ISD::UREM +SDValue DAGCombiner::visitREM(SDNode *N) { + unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); + bool isSigned = (Opcode == ISD::SREM); + SDLoc DL(N); - // fold (srem c1, c2) -> c1%c2 + // fold (rem c1, c2) -> c1%c2 ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C) - if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT, - N0C, N1C)) + if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C)) return Folded; - // If we know the sign bits of both operands are zero, strength reduce to a - // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 - if (!VT.isVector()) { - if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1); - } - // If X/C can be simplified by the division-by-constant logic, lower - // X%C to the equivalent of X-X/C*C. - if (N1C && !N1C->isNullValue()) { - SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1); - AddToWorklist(Div.getNode()); - SDValue OptimizedDiv = combine(Div.getNode()); - if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, - OptimizedDiv, N1); - SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); - AddToWorklist(Mul.getNode()); - return Sub; + if (isSigned) { + // If we know the sign bits of both operands are zero, strength reduce to a + // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 + if (!VT.isVector()) { + if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UREM, DL, VT, N0, N1); } - } - - // undef % X -> 0 - if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, SDLoc(N), VT); - // X % undef -> undef - if (N1.getOpcode() == ISD::UNDEF) - return N1; - - return SDValue(); -} - -SDValue DAGCombiner::visitUREM(SDNode *N) { - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - EVT VT = N->getValueType(0); - - // fold (urem c1, c2) -> c1%c2 - ConstantSDNode *N0C = isConstOrConstSplat(N0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N0C && N1C) - if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT, - N0C, N1C)) - return Folded; - // fold (urem x, pow2) -> (and x, pow2-1) - if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && - N1C->getAPIntValue().isPowerOf2()) { - SDLoc DL(N); - return DAG.getNode(ISD::AND, DL, VT, N0, - DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT)); - } - // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) - if (N1.getOpcode() == ISD::SHL) { - if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { - if (SHC->getAPIntValue().isPowerOf2()) { - SDLoc DL(N); - SDValue Add = - DAG.getNode(ISD::ADD, DL, VT, N1, + } else { + // fold (urem x, pow2) -> (and x, pow2-1) + if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && + N1C->getAPIntValue().isPowerOf2()) { + return DAG.getNode(ISD::AND, DL, VT, N0, + DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT)); + } + // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) + if (N1.getOpcode() == ISD::SHL) { + if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { + if (SHC->getAPIntValue().isPowerOf2()) { + SDValue Add = + DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT)); - AddToWorklist(Add.getNode()); - return DAG.getNode(ISD::AND, DL, VT, N0, Add); + AddToWorklist(Add.getNode()); + return DAG.getNode(ISD::AND, DL, VT, N0, Add); + } } } } + AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + // If X/C can be simplified by the division-by-constant logic, lower // X%C to the equivalent of X-X/C*C. - if (N1C && !N1C->isNullValue()) { - SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1); + // To avoid mangling nodes, this simplification requires that the combine() + // call for the speculative DIV must not cause a DIVREM conversion. We guard + // against this by skipping the simplification if isIntDivCheap(). When + // div is not cheap, combine will not return a DIVREM. Regardless, + // checking cheapness here makes sense since the simplification results in + // fatter code. + if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) { + unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; + SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1); AddToWorklist(Div.getNode()); SDValue OptimizedDiv = combine(Div.getNode()); if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, - OptimizedDiv, N1); - SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); + assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) && + (OptimizedDiv.getOpcode() != ISD::SDIVREM)); + SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1); + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); AddToWorklist(Mul.getNode()); return Sub; } } + // sdiv, srem -> sdivrem + if (SDValue DivRem = useDivRem(N)) + return DivRem.getValue(1); + // undef % X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, SDLoc(N), VT); + return DAG.getConstant(0, DL, VT); // X % undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2532,8 +2600,8 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, } SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { - SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); - if (Res.getNode()) return Res; + if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS)) + return Res; EVT VT = N->getValueType(0); SDLoc DL(N); @@ -2563,8 +2631,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { } SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { - SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); - if (Res.getNode()) return Res; + if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU)) + return Res; EVT VT = N->getValueType(0); SDLoc DL(N); @@ -2613,16 +2681,26 @@ SDValue DAGCombiner::visitUMULO(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitSDIVREM(SDNode *N) { - SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); - if (Res.getNode()) return Res; +SDValue DAGCombiner::visitIMINMAX(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; - return SDValue(); -} + // fold (add c1, c2) -> c1+c2 + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); + if (N0C && N1C) + return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C); -SDValue DAGCombiner::visitUDIVREM(SDNode *N) { - SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM); - if (Res.getNode()) return Res; + // canonicalize constant to RHS + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); return SDValue(); } @@ -2848,10 +2926,13 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, if (Result != ISD::SETCC_INVALID && (!LegalOperations || (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && - TLI.isOperationLegal(ISD::SETCC, - getSetCCResultType(N0.getSimpleValueType()))))) - return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), - LL, LR, Result); + TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) { + EVT CCVT = getSetCCResultType(LL.getValueType()); + if (N0.getValueType() == CCVT || + (!LegalOperations && N0.getValueType() == MVT::i1)) + return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), + LL, LR, Result); + } } } @@ -2887,6 +2968,46 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, return SDValue(); } +bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, + EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, + bool &NarrowLoad) { + uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits(); + + if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue())) + return false; + + ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + LoadedVT = LoadN->getMemoryVT(); + + if (ExtVT == LoadedVT && + (!LegalOperations || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) { + // ZEXTLOAD will match without needing to change the size of the value being + // loaded. + NarrowLoad = false; + return true; + } + + // Do not change the width of a volatile load. + if (LoadN->isVolatile()) + return false; + + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound()) + return false; + + if (LegalOperations && + !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT)) + return false; + + if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT)) + return false; + + NarrowLoad = true; + return true; +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -3079,16 +3200,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) { : cast<LoadSDNode>(N0); if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { - uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); - if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ - EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); - EVT LoadedVT = LN0->getMemoryVT(); - EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; - - if (ExtVT == LoadedVT && - (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, - ExtVT))) { - + auto NarrowLoad = false; + EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; + EVT ExtVT, LoadedVT; + if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT, + NarrowLoad)) { + if (!NarrowLoad) { SDValue NewLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), LN0->getBasePtr(), ExtVT, @@ -3096,14 +3213,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { AddToWorklist(N); CombineTo(LN0, NewLoad, NewLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - - // Do not change the width of a volatile load. - // Do not generate loads of non-round integer types since these can - // be expensive (and would be wrong if the type is not byte sized). - if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && - (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, - ExtVT))) { + } else { EVT PtrType = LN0->getOperand(1).getValueType(); unsigned Alignment = LN0->getAlignment(); @@ -3142,10 +3252,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return Combined; // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) - if (N0.getOpcode() == N1.getOpcode()) { - SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); - if (Tmp.getNode()) return Tmp; - } + if (N0.getOpcode() == N1.getOpcode()) + if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) + return Tmp; // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) // fold (and (sra)) -> (and (srl)) when possible. @@ -3507,10 +3616,13 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { if (Result != ISD::SETCC_INVALID && (!LegalOperations || (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && - TLI.isOperationLegal(ISD::SETCC, - getSetCCResultType(N0.getValueType()))))) - return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), - LL, LR, Result); + TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) { + EVT CCVT = getSetCCResultType(LL.getValueType()); + if (N0.getValueType() == CCVT || + (!LegalOperations && N0.getValueType() == MVT::i1)) + return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), + LL, LR, Result); + } } } @@ -3665,11 +3777,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return Combined; // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) - SDValue BSwap = MatchBSwapHWord(N, N0, N1); - if (BSwap.getNode()) + if (SDValue BSwap = MatchBSwapHWord(N, N0, N1)) return BSwap; - BSwap = MatchBSwapHWordLow(N, N0, N1); - if (BSwap.getNode()) + if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1)) return BSwap; // reassociate or @@ -3690,10 +3800,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } } // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) - if (N0.getOpcode() == N1.getOpcode()) { - SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); - if (Tmp.getNode()) return Tmp; - } + if (N0.getOpcode() == N1.getOpcode()) + if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) + return Tmp; // See if this is some rotate idiom. if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) @@ -3710,7 +3819,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { /// Match "(X shl/srl V1) & V2" where V2 may not be present. static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { if (Op.getOpcode() == ISD::AND) { - if (isa<ConstantSDNode>(Op.getOperand(1))) { + if (isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { Mask = Op.getOperand(1); Op = Op.getOperand(0); } else { @@ -3727,105 +3836,106 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { } // Return true if we can prove that, whenever Neg and Pos are both in the -// range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos). This means that +// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that // for two opposing shifts shift1 and shift2 and a value X with OpBits bits: // // (or (shift1 X, Neg), (shift2 X, Pos)) // // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate -// in direction shift1 by Neg. The range [0, OpSize) means that we only need +// in direction shift1 by Neg. The range [0, EltSize) means that we only need // to consider shift amounts with defined behavior. -static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) { - // If OpSize is a power of 2 then: +static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) { + // If EltSize is a power of 2 then: // - // (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1) - // (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize). + // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1) + // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize). // - // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check + // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check // for the stronger condition: // - // Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1) [A] + // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A] // - // for all Neg and Pos. Since Neg & (OpSize - 1) == Neg' & (OpSize - 1) + // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1) // we can just replace Neg with Neg' for the rest of the function. // // In other cases we check for the even stronger condition: // - // Neg == OpSize - Pos [B] + // Neg == EltSize - Pos [B] // // for all Neg and Pos. Note that the (or ...) then invokes undefined - // behavior if Pos == 0 (and consequently Neg == OpSize). + // behavior if Pos == 0 (and consequently Neg == EltSize). // - // We could actually use [A] whenever OpSize is a power of 2, but the + // We could actually use [A] whenever EltSize is a power of 2, but the // only extra cases that it would match are those uninteresting ones // where Neg and Pos are never in range at the same time. E.g. for - // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) + // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) // as well as (sub 32, Pos), but: // // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) // // always invokes undefined behavior for 32-bit X. // - // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise. + // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise. unsigned MaskLoBits = 0; - if (Neg.getOpcode() == ISD::AND && - isPowerOf2_64(OpSize) && - Neg.getOperand(1).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) { - Neg = Neg.getOperand(0); - MaskLoBits = Log2_64(OpSize); + if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { + if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) { + if (NegC->getAPIntValue() == EltSize - 1) { + Neg = Neg.getOperand(0); + MaskLoBits = Log2_64(EltSize); + } + } } // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. if (Neg.getOpcode() != ISD::SUB) - return 0; - ConstantSDNode *NegC = dyn_cast<ConstantSDNode>(Neg.getOperand(0)); + return false; + ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0)); if (!NegC) - return 0; + return false; SDValue NegOp1 = Neg.getOperand(1); - // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with + // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with // Pos'. The truncation is redundant for the purpose of the equality. - if (MaskLoBits && - Pos.getOpcode() == ISD::AND && - Pos.getOperand(1).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1) - Pos = Pos.getOperand(0); + if (MaskLoBits && Pos.getOpcode() == ISD::AND) + if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) + if (PosC->getAPIntValue() == EltSize - 1) + Pos = Pos.getOperand(0); // The condition we need is now: // - // (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask + // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask // // If NegOp1 == Pos then we need: // - // OpSize & Mask == NegC & Mask + // EltSize & Mask == NegC & Mask // // (because "x & Mask" is a truncation and distributes through subtraction). APInt Width; if (Pos == NegOp1) Width = NegC->getAPIntValue(); + // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. // Then the condition we want to prove becomes: // - // (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask + // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask // // which, again because "x & Mask" is a truncation, becomes: // - // NegC & Mask == (OpSize - PosC) & Mask - // OpSize & Mask == (NegC + PosC) & Mask - else if (Pos.getOpcode() == ISD::ADD && - Pos.getOperand(0) == NegOp1 && - Pos.getOperand(1).getOpcode() == ISD::Constant) - Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() + - NegC->getAPIntValue()); - else + // NegC & Mask == (EltSize - PosC) & Mask + // EltSize & Mask == (NegC + PosC) & Mask + else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) { + if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) + Width = PosC->getAPIntValue() + NegC->getAPIntValue(); + else + return false; + } else return false; - // Now we just need to check that OpSize & Mask == Width & Mask. + // Now we just need to check that EltSize & Mask == Width & Mask. if (MaskLoBits) - // Opsize & Mask is 0 since Mask is Opsize - 1. + // EltSize & Mask is 0 since Mask is EltSize - 1. return Width.getLoBits(MaskLoBits) == 0; - return Width == OpSize; + return Width == EltSize; } // A subroutine of MatchRotate used once we have found an OR of two opposite @@ -3845,7 +3955,7 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, // (srl x, (*ext y))) -> // (rotr x, y) or (rotl x, (sub 32, y)) EVT VT = Shifted.getValueType(); - if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) { + if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) { bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, HasPos ? Pos : Neg).getNode(); @@ -3888,10 +3998,10 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { if (RHSShift.getOpcode() == ISD::SHL) { std::swap(LHS, RHS); std::swap(LHSShift, RHSShift); - std::swap(LHSMask , RHSMask ); + std::swap(LHSMask, RHSMask); } - unsigned OpSizeInBits = VT.getSizeInBits(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); SDValue LHSShiftArg = LHSShift.getOperand(0); SDValue LHSShiftAmt = LHSShift.getOperand(1); SDValue RHSShiftArg = RHSShift.getOperand(0); @@ -3899,11 +4009,10 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) - if (LHSShiftAmt.getOpcode() == ISD::Constant && - RHSShiftAmt.getOpcode() == ISD::Constant) { - uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue(); - uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue(); - if ((LShVal + RShVal) != OpSizeInBits) + if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) { + uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue(); + uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue(); + if ((LShVal + RShVal) != EltSizeInBits) return nullptr; SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, @@ -3911,18 +4020,23 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { // If there is an AND of either shifted operand, apply it to the result. if (LHSMask.getNode() || RHSMask.getNode()) { - APInt Mask = APInt::getAllOnesValue(OpSizeInBits); + APInt AllBits = APInt::getAllOnesValue(EltSizeInBits); + SDValue Mask = DAG.getConstant(AllBits, DL, VT); if (LHSMask.getNode()) { - APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal); - Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits; + APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal); + Mask = DAG.getNode(ISD::AND, DL, VT, Mask, + DAG.getNode(ISD::OR, DL, VT, LHSMask, + DAG.getConstant(RHSBits, DL, VT))); } if (RHSMask.getNode()) { - APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal); - Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits; + APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal); + Mask = DAG.getNode(ISD::AND, DL, VT, Mask, + DAG.getNode(ISD::OR, DL, VT, RHSMask, + DAG.getConstant(LHSBits, DL, VT))); } - Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, DL, VT)); + Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask); } return Rot.getNode(); @@ -4112,10 +4226,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) - if (N0.getOpcode() == N1.getOpcode()) { - SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); - if (Tmp.getNode()) return Tmp; - } + if (N0.getOpcode() == N1.getOpcode()) + if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) + return Tmp; // Simplify the expression using non-local knowledge. if (!VT.isVector() && @@ -4434,12 +4547,19 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); } - if (N1C && !N1C->isOpaque()) { - SDValue NewSHL = visitShiftByConstant(N, N1C); - if (NewSHL.getNode()) - return NewSHL; + // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) + if (N1C && N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse()) { + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { + if (SDValue Folded = + DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, N0C1, N1C)) + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Folded); + } } + if (N1C && !N1C->isOpaque()) + if (SDValue NewSHL = visitShiftByConstant(N, N1C)) + return NewSHL; + return SDValue(); } @@ -4583,11 +4703,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); - if (N1C && !N1C->isOpaque()) { - SDValue NewSRA = visitShiftByConstant(N, N1C); - if (NewSRA.getNode()) + if (N1C && !N1C->isOpaque()) + if (SDValue NewSRA = visitShiftByConstant(N, N1C)) return NewSRA; - } return SDValue(); } @@ -4744,8 +4862,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { - SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); - if (NewOp1.getNode()) + if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); } @@ -4754,15 +4871,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); - if (N1C && !N1C->isOpaque()) { - SDValue NewSRL = visitShiftByConstant(N, N1C); - if (NewSRL.getNode()) + if (N1C && !N1C->isOpaque()) + if (SDValue NewSRL = visitShiftByConstant(N, N1C)) return NewSRL; - } // Attempt to convert a srl of a load into a narrower zero-extending load. - SDValue NarrowLoad = ReduceLoadWidth(N); - if (NarrowLoad.getNode()) + if (SDValue NarrowLoad = ReduceLoadWidth(N)) return NarrowLoad; // Here is a common situation. We want to optimize: @@ -4973,70 +5087,47 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (SimplifySelectOps(N, N1, N2)) return SDValue(N, 0); // Don't revisit N. - // fold selects based on a setcc into other things, such as min/max/abs - if (N0.getOpcode() == ISD::SETCC) { - // select x, y (fcmp lt x, y) -> fminnum x, y - // select x, y (fcmp gt x, y) -> fmaxnum x, y - // - // This is OK if we don't care about what happens if either operand is a - // NaN. - // - - // FIXME: Instead of testing for UnsafeFPMath, this should be checking for - // no signed zeros as well as no nans. - const TargetOptions &Options = DAG.getTarget().Options; - if (Options.UnsafeFPMath && - VT.isFloatingPoint() && N0.hasOneUse() && - DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { - ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); - - SDValue FMinMax = - combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), - N1, N2, CC, TLI, DAG); - if (FMinMax) - return FMinMax; - } - - if ((!LegalOperations && - TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || - TLI.isOperationLegal(ISD::SELECT_CC, VT)) - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1), - N1, N2, N0.getOperand(2)); - return SimplifySelect(SDLoc(N), N0, N1, N2); - } - if (VT0 == MVT::i1) { - if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { - // select (and Cond0, Cond1), X, Y - // -> select Cond0, (select Cond1, X, Y), Y - if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { - SDValue Cond0 = N0->getOperand(0); - SDValue Cond1 = N0->getOperand(1); - SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), - N1.getValueType(), Cond1, N1, N2); + // The code in this block deals with the following 2 equivalences: + // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y)) + // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y) + // The target can specify its prefered form with the + // shouldNormalizeToSelectSequence() callback. However we always transform + // to the right anyway if we find the inner select exists in the DAG anyway + // and we always transform to the left side if we know that we can further + // optimize the combination of the conditions. + bool normalizeToSequence + = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT); + // select (and Cond0, Cond1), X, Y + // -> select Cond0, (select Cond1, X, Y), Y + if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + if (normalizeToSequence || !InnerSelect.use_empty()) return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, InnerSelect, N2); - } - // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) - if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { - SDValue Cond0 = N0->getOperand(0); - SDValue Cond1 = N0->getOperand(1); - SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), - N1.getValueType(), Cond1, N1, N2); + } + // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) + if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + if (normalizeToSequence || !InnerSelect.use_empty()) return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1, InnerSelect); - } } // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y - if (N1->getOpcode() == ISD::SELECT) { + if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) { SDValue N1_0 = N1->getOperand(0); SDValue N1_1 = N1->getOperand(1); SDValue N1_2 = N1->getOperand(2); if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { // Create the actual and node if we can generate good code for it. - if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + if (!normalizeToSequence) { SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(), N0, N1_0); return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And, @@ -5049,13 +5140,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { } } // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y - if (N2->getOpcode() == ISD::SELECT) { + if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) { SDValue N2_0 = N2->getOperand(0); SDValue N2_1 = N2->getOperand(1); SDValue N2_2 = N2->getOperand(2); if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { // Create the actual or node if we can generate good code for it. - if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + if (!normalizeToSequence) { SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(), N0, N2_0); return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or, @@ -5069,6 +5160,38 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { } } + // fold selects based on a setcc into other things, such as min/max/abs + if (N0.getOpcode() == ISD::SETCC) { + // select x, y (fcmp lt x, y) -> fminnum x, y + // select x, y (fcmp gt x, y) -> fmaxnum x, y + // + // This is OK if we don't care about what happens if either operand is a + // NaN. + // + + // FIXME: Instead of testing for UnsafeFPMath, this should be checking for + // no signed zeros as well as no nans. + const TargetOptions &Options = DAG.getTarget().Options; + if (Options.UnsafeFPMath && + VT.isFloatingPoint() && N0.hasOneUse() && + DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + + if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), + N0.getOperand(1), N1, N2, CC, + TLI, DAG)) + return FMinMax; + } + + if ((!LegalOperations && + TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || + TLI.isOperationLegal(ISD::SELECT_CC, VT)) + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), + N1, N2, N0.getOperand(2)); + return SimplifySelect(SDLoc(N), N0, N1, N2); + } + return SDValue(); } @@ -5523,8 +5646,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { if (N1.getOpcode() == ISD::CONCAT_VECTORS && N2.getOpcode() == ISD::CONCAT_VECTORS && ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { - SDValue CV = ConvertSelectToConcatVector(N, DAG); - if (CV.getNode()) + if (SDValue CV = ConvertSelectToConcatVector(N, DAG)) return CV; } @@ -5580,7 +5702,20 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { SDLoc(N)); } -/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or +SDValue DAGCombiner::visitSETCCE(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + SDValue Cond = N->getOperand(3); + + // If Carry is false, fold to a regular SETCC. + if (Carry.getOpcode() == ISD::CARRY_FALSE) + return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond); + + return SDValue(); +} + +/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or /// a build_vector of constants. /// This function is called by the DAGCombiner when visiting sext/zext/aext /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). @@ -5837,8 +5972,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::TRUNCATE) { // fold (sext (truncate (load x))) -> (sext (smaller load x)) // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) - SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); - if (NarrowLoad.getNode()) { + if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { SDNode* oye = N0.getNode()->getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); @@ -6024,7 +6158,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (!VT.isVector()) { EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType()); - if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) { + if (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) { SDLoc DL(N); ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); SDValue SetCC = DAG.getSetCC(DL, SetCCVT, @@ -6120,8 +6255,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (truncate (load x))) -> (zext (smaller load x)) // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) if (N0.getOpcode() == ISD::TRUNCATE) { - SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); - if (NarrowLoad.getNode()) { + if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { SDNode* oye = N0.getNode()->getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); @@ -6133,32 +6267,45 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } // fold (zext (truncate x)) -> (and x, mask) - if (N0.getOpcode() == ISD::TRUNCATE && - (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { - + if (N0.getOpcode() == ISD::TRUNCATE) { // fold (zext (truncate (load x))) -> (zext (smaller load x)) // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) - SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); - if (NarrowLoad.getNode()) { - SDNode* oye = N0.getNode()->getOperand(0).getNode(); + if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { + SDNode *oye = N0.getNode()->getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. AddToWorklist(oye); } - return SDValue(N, 0); // Return N so it doesn't get rechecked! + return SDValue(N, 0); // Return N so it doesn't get rechecked! } - SDValue Op = N0.getOperand(0); - if (Op.getValueType().bitsLT(VT)) { - Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); - AddToWorklist(Op.getNode()); - } else if (Op.getValueType().bitsGT(VT)) { - Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); - AddToWorklist(Op.getNode()); + EVT SrcVT = N0.getOperand(0).getValueType(); + EVT MinVT = N0.getValueType(); + + // Try to mask before the extension to avoid having to generate a larger mask, + // possibly over several sub-vectors. + if (SrcVT.bitsLT(VT)) { + if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) && + TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) { + SDValue Op = N0.getOperand(0); + Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); + AddToWorklist(Op.getNode()); + return DAG.getZExtOrTrunc(Op, SDLoc(N), VT); + } + } + + if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) { + SDValue Op = N0.getOperand(0); + if (SrcVT.bitsLT(VT)) { + Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); + AddToWorklist(Op.getNode()); + } else if (SrcVT.bitsGT(VT)) { + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); + AddToWorklist(Op.getNode()); + } + return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); } - return DAG.getZeroExtendInReg(Op, SDLoc(N), - N0.getValueType().getScalarType()); } // Fold (zext (and (trunc x), cst)) -> (and x, cst), @@ -6219,6 +6366,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (and/or/xor (load x), cst)) -> // (and/or/xor (zextload x), (zext cst)) + // Unless (and (load x) cst) will match as a zextload already and has + // additional users. if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::XOR) && isa<LoadSDNode>(N0.getOperand(0)) && @@ -6229,9 +6378,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; - if (!N0.hasOneUse()) - DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, - SetCCs, TLI); + if (!N0.hasOneUse()) { + if (N0.getOpcode() == ISD::AND) { + auto *AndC = cast<ConstantSDNode>(N0.getOperand(1)); + auto NarrowLoad = false; + EVT LoadResultTy = AndC->getValueType(0); + EVT ExtVT, LoadedVT; + if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT, + NarrowLoad)) + DoXform = false; + } + if (DoXform) + DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), + ISD::ZERO_EXTEND, SetCCs, TLI); + } if (DoXform) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), @@ -6378,8 +6538,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // fold (aext (truncate (load x))) -> (aext (smaller load x)) // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) if (N0.getOpcode() == ISD::TRUNCATE) { - SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); - if (NarrowLoad.getNode()) { + if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { SDNode* oye = N0.getNode()->getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); @@ -6546,8 +6705,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { // Watch out for shift count overflow though. if (Amt >= Mask.getBitWidth()) break; APInt NewMask = Mask << Amt; - SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); - if (SimplifyLHS.getNode()) + if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask)) return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS, V.getOperand(1)); } @@ -6736,8 +6894,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { unsigned VTBits = VT.getScalarType().getSizeInBits(); unsigned EVTBits = EVT.getScalarType().getSizeInBits(); + if (N0.isUndef()) + return DAG.getUNDEF(VT); + // fold (sext_in_reg c1) -> c1 - if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF) + if (isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); // If the input is already sign extended, just drop the extension. @@ -6771,8 +6932,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // fold (sext_in_reg (load x)) -> (smaller sextload x) // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) - SDValue NarrowLoad = ReduceLoadWidth(N); - if (NarrowLoad.getNode()) + if (SDValue NarrowLoad = ReduceLoadWidth(N)) return NarrowLoad; // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) @@ -6831,29 +6991,6 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { BSwap, N1); } - // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs - // into a build_vector. - if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { - SmallVector<SDValue, 8> Elts; - unsigned NumElts = N0->getNumOperands(); - unsigned ShAmt = VTBits - EVTBits; - - for (unsigned i = 0; i != NumElts; ++i) { - SDValue Op = N0->getOperand(i); - if (Op->getOpcode() == ISD::UNDEF) { - Elts.push_back(Op); - continue; - } - - ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op); - const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); - Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), - SDLoc(Op), Op.getValueType())); - } - - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts); - } - return SDValue(); } @@ -6999,9 +7136,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { - SDValue Reduced = ReduceLoadWidth(N); - if (Reduced.getNode()) + if (SDValue Reduced = ReduceLoadWidth(N)) return Reduced; + // Handle the case where the load remains an extending load even // after truncation. if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { @@ -7107,6 +7244,12 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { return SDValue(); } +static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) { + // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi + // and Lo parts; on big-endian machines it doesn't. + return DAG.getDataLayout().isBigEndian() ? 1 : 0; +} + SDValue DAGCombiner::visitBITCAST(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -7173,6 +7316,14 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) + // + // For ppc_fp128: + // fold (bitcast (fneg x)) -> + // flipbit = signbit + // (xor (bitcast x) (build_pair flipbit, flipbit)) + // fold (bitcast (fabs x)) -> + // flipbit = (and (extract_element (bitcast x), 0), signbit) + // (xor (bitcast x) (build_pair flipbit, flipbit)) // This often reduces constant pool loads. if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && @@ -7183,6 +7334,29 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { AddToWorklist(NewConv.getNode()); SDLoc DL(N); + if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { + assert(VT.getSizeInBits() == 128); + SDValue SignBit = DAG.getConstant( + APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64); + SDValue FlipBit; + if (N0.getOpcode() == ISD::FNEG) { + FlipBit = SignBit; + AddToWorklist(FlipBit.getNode()); + } else { + assert(N0.getOpcode() == ISD::FABS); + SDValue Hi = + DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv, + DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), + SDLoc(NewConv))); + AddToWorklist(Hi.getNode()); + FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit); + AddToWorklist(FlipBit.getNode()); + } + SDValue FlipBits = + DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); + AddToWorklist(FlipBits.getNode()); + return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits); + } APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); if (N0.getOpcode() == ISD::FNEG) return DAG.getNode(ISD::XOR, DL, VT, @@ -7196,6 +7370,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // (or (and (bitconvert x), sign), (and cst, (not sign))) // Note that we don't handle (copysign x, cst) because this can always be // folded to an fneg or fabs. + // + // For ppc_fp128: + // fold (bitcast (fcopysign cst, x)) -> + // flipbit = (and (extract_element + // (xor (bitcast cst), (bitcast x)), 0), + // signbit) + // (xor (bitcast cst) (build_pair flipbit, flipbit)) if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() && !VT.isVector()) { @@ -7224,6 +7405,30 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { AddToWorklist(X.getNode()); } + if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { + APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2); + SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(0)), VT, + N0.getOperand(0)); + AddToWorklist(Cst.getNode()); + SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(1)), VT, + N0.getOperand(1)); + AddToWorklist(X.getNode()); + SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X); + AddToWorklist(XorResult.getNode()); + SDValue XorResult64 = DAG.getNode( + ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult, + DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), + SDLoc(XorResult))); + AddToWorklist(XorResult64.getNode()); + SDValue FlipBit = + DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64, + DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64)); + AddToWorklist(FlipBit.getNode()); + SDValue FlipBits = + DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); + AddToWorklist(FlipBits.getNode()); + return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits); + } APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); X = DAG.getNode(ISD::AND, SDLoc(X), VT, X, DAG.getConstant(SignBit, SDLoc(X), VT)); @@ -7240,11 +7445,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { } // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. - if (N0.getOpcode() == ISD::BUILD_PAIR) { - SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT); - if (CombineLD.getNode()) + if (N0.getOpcode() == ISD::BUILD_PAIR) + if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT)) return CombineLD; - } // Remove double bitcasts from shuffles - this is often a legacy of // XformToShuffleWithZero being used to combine bitmaskings (of @@ -7257,10 +7460,10 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0); // If operands are a bitcast, peek through if it casts the original VT. - // If operands are a UNDEF or constant, just bitcast back to original VT. + // If operands are a constant, just bitcast back to original VT. auto PeekThroughBitcast = [&](SDValue Op) { if (Op.getOpcode() == ISD::BITCAST && - Op.getOperand(0)->getValueType(0) == VT) + Op.getOperand(0).getValueType() == VT) return SDValue(Op.getOperand(0)); if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) @@ -7431,28 +7634,34 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { SDLoc SL(N); const TargetOptions &Options = DAG.getTarget().Options; - bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast || - Options.UnsafeFPMath); + bool AllowFusion = + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); // Floating-point multiply-add with intermediate rounding. - bool HasFMAD = (LegalOperations && - TLI.isOperationLegal(ISD::FMAD, VT)); + bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); // Floating-point multiply-add without intermediate rounding. - bool HasFMA = ((!LegalOperations || - TLI.isOperationLegalOrCustom(ISD::FMA, VT)) && - TLI.isFMAFasterThanFMulAndFAdd(VT) && - UnsafeFPMath); + bool HasFMA = + AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // No valid opcode, do not combine. if (!HasFMAD && !HasFMA) return SDValue(); // Always prefer FMAD to FMA for precision. - unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; + unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); bool LookThroughFPExt = TLI.isFPExtFree(VT); + // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + if (Aggressive && N0.getOpcode() == ISD::FMUL && + N1.getOpcode() == ISD::FMUL) { + if (N0.getNode()->use_size() > N1.getNode()->use_size()) + std::swap(N0, N1); + } + // fold (fadd (fmul x, y), z) -> (fma x, y, z) if (N0.getOpcode() == ISD::FMUL && (Aggressive || N0->hasOneUse())) { @@ -7469,7 +7678,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { } // Look through FP_EXTEND nodes to do more combining. - if (UnsafeFPMath && LookThroughFPExt) { + if (AllowFusion && LookThroughFPExt) { // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); @@ -7495,7 +7704,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { } // More folding opportunities when target permits. - if ((UnsafeFPMath || HasFMAD) && Aggressive) { + if ((AllowFusion || HasFMAD) && Aggressive) { // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) if (N0.getOpcode() == PreferredFusedOpcode && N0.getOperand(2).getOpcode() == ISD::FMUL) { @@ -7518,7 +7727,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { N0)); } - if (UnsafeFPMath && LookThroughFPExt) { + if (AllowFusion && LookThroughFPExt) { // fold (fadd (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y, (fma (fpext u), (fpext v), z)) auto FoldFAddFMAFPExtFMul = [&] ( @@ -7608,25 +7817,23 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { SDLoc SL(N); const TargetOptions &Options = DAG.getTarget().Options; - bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast || - Options.UnsafeFPMath); + bool AllowFusion = + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); // Floating-point multiply-add with intermediate rounding. - bool HasFMAD = (LegalOperations && - TLI.isOperationLegal(ISD::FMAD, VT)); + bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); // Floating-point multiply-add without intermediate rounding. - bool HasFMA = ((!LegalOperations || - TLI.isOperationLegalOrCustom(ISD::FMA, VT)) && - TLI.isFMAFasterThanFMulAndFAdd(VT) && - UnsafeFPMath); + bool HasFMA = + AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // No valid opcode, do not combine. if (!HasFMAD && !HasFMA) return SDValue(); // Always prefer FMAD to FMA for precision. - unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; + unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); bool LookThroughFPExt = TLI.isFPExtFree(VT); @@ -7659,7 +7866,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { } // Look through FP_EXTEND nodes to do more combining. - if (UnsafeFPMath && LookThroughFPExt) { + if (AllowFusion && LookThroughFPExt) { // fold (fsub (fpext (fmul x, y)), z) // -> (fma (fpext x), (fpext y), (fneg z)) if (N0.getOpcode() == ISD::FP_EXTEND) { @@ -7735,7 +7942,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { } // More folding opportunities when target permits. - if ((UnsafeFPMath || HasFMAD) && Aggressive) { + if ((AllowFusion || HasFMAD) && Aggressive) { // fold (fsub (fma x, y, (fmul u, v)), z) // -> (fma x, y (fma u, v, (fneg z))) if (N0.getOpcode() == PreferredFusedOpcode && @@ -7765,7 +7972,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N21, N0)); } - if (UnsafeFPMath && LookThroughFPExt) { + if (AllowFusion && LookThroughFPExt) { // fold (fsub (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) if (N0.getOpcode() == PreferredFusedOpcode) { @@ -7866,14 +8073,97 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { return SDValue(); } +/// Try to perform FMA combining on a given FMUL node. +SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc SL(N); + + assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation"); + + const TargetOptions &Options = DAG.getTarget().Options; + bool AllowFusion = + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); + + // Floating-point multiply-add with intermediate rounding. + bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); + + // Floating-point multiply-add without intermediate rounding. + bool HasFMA = + AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); + + // No valid opcode, do not combine. + if (!HasFMAD && !HasFMA) + return SDValue(); + + // Always prefer FMAD to FMA for precision. + unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; + bool Aggressive = TLI.enableAggressiveFMAFusion(VT); + + // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y) + // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y)) + auto FuseFADD = [&](SDValue X, SDValue Y) { + if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) { + auto XC1 = isConstOrConstSplatFP(X.getOperand(1)); + if (XC1 && XC1->isExactlyValue(+1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); + if (XC1 && XC1->isExactlyValue(-1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, + DAG.getNode(ISD::FNEG, SL, VT, Y)); + } + return SDValue(); + }; + + if (SDValue FMA = FuseFADD(N0, N1)) + return FMA; + if (SDValue FMA = FuseFADD(N1, N0)) + return FMA; + + // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y) + // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y)) + // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y)) + // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y) + auto FuseFSUB = [&](SDValue X, SDValue Y) { + if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) { + auto XC0 = isConstOrConstSplatFP(X.getOperand(0)); + if (XC0 && XC0->isExactlyValue(+1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, + Y); + if (XC0 && XC0->isExactlyValue(-1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, + DAG.getNode(ISD::FNEG, SL, VT, Y)); + + auto XC1 = isConstOrConstSplatFP(X.getOperand(1)); + if (XC1 && XC1->isExactlyValue(+1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, + DAG.getNode(ISD::FNEG, SL, VT, Y)); + if (XC1 && XC1->isExactlyValue(-1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); + } + return SDValue(); + }; + + if (SDValue FMA = FuseFSUB(N0, N1)) + return FMA; + if (SDValue FMA = FuseFSUB(N1, N0)) + return FMA; + + return SDValue(); +} + SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0); + bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; + const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; // fold vector ops if (VT.isVector()) @@ -7882,23 +8172,23 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd c1, c2) -> c1 + c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FADD, DL, VT, N0, N1); + return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags); // canonicalize constant to RHS if (N0CFP && !N1CFP) - return DAG.getNode(ISD::FADD, DL, VT, N1, N0); + return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags); // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) return DAG.getNode(ISD::FSUB, DL, VT, N0, - GetNegatedExpression(N1, DAG, LegalOperations)); + GetNegatedExpression(N1, DAG, LegalOperations), Flags); // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2) return DAG.getNode(ISD::FSUB, DL, VT, N1, - GetNegatedExpression(N0, DAG, LegalOperations)); + GetNegatedExpression(N0, DAG, LegalOperations), Flags); // If 'unsafe math' is enabled, fold lots of things. if (Options.UnsafeFPMath) { @@ -7907,14 +8197,17 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { bool AllowNewConst = (Level < AfterLegalizeDAG); // fold (fadd A, 0) -> A - if (N1CFP && N1CFP->isZero()) - return N0; + if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1)) + if (N1C->isZero()) + return N0; // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && - isa<ConstantFPSDNode>(N0.getOperand(1))) + isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), - DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1)); + DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, + Flags), + Flags); // If allowed, fold (fadd (fneg x), x) -> 0.0 if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) @@ -7929,64 +8222,64 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // of rounding steps. if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { if (N0.getOpcode() == ISD::FMUL) { - ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); - ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); + bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); + bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)); // (fadd (fmul x, c), x) -> (fmul x, c+1) if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { - SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0), - DAG.getConstantFP(1.0, DL, VT)); - return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), + DAG.getConstantFP(1.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags); } // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0), - DAG.getConstantFP(2.0, DL, VT)); - return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), + DAG.getConstantFP(2.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags); } } if (N1.getOpcode() == ISD::FMUL) { - ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); - ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); + bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); + bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1)); // (fadd x, (fmul x, c)) -> (fmul x, c+1) if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { - SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0), - DAG.getConstantFP(1.0, DL, VT)); - return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), + DAG.getConstantFP(1.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags); } // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N0.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0), - DAG.getConstantFP(2.0, DL, VT)); - return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), + DAG.getConstantFP(2.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags); } } if (N0.getOpcode() == ISD::FADD && AllowNewConst) { - ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); + bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); // (fadd (fadd x, x), x) -> (fmul x, 3.0) - if (!CFP && N0.getOperand(0) == N0.getOperand(1) && + if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) && (N0.getOperand(0) == N1)) { return DAG.getNode(ISD::FMUL, DL, VT, - N1, DAG.getConstantFP(3.0, DL, VT)); + N1, DAG.getConstantFP(3.0, DL, VT), Flags); } } if (N1.getOpcode() == ISD::FADD && AllowNewConst) { - ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); + bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); // (fadd x, (fadd x, x)) -> (fmul x, 3.0) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && N1.getOperand(0) == N0) { return DAG.getNode(ISD::FMUL, DL, VT, - N0, DAG.getConstantFP(3.0, DL, VT)); + N0, DAG.getConstantFP(3.0, DL, VT), Flags); } } @@ -7996,15 +8289,14 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { - return DAG.getNode(ISD::FMUL, DL, VT, - N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), + DAG.getConstantFP(4.0, DL, VT), Flags); } } } // enable-unsafe-fp-math // FADD -> FMA combines: - SDValue Fused = visitFADDForFMACombine(N); - if (Fused) { + if (SDValue Fused = visitFADDForFMACombine(N)) { AddToWorklist(Fused.getNode()); return Fused; } @@ -8020,6 +8312,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { EVT VT = N->getValueType(0); SDLoc dl(N); const TargetOptions &Options = DAG.getTarget().Options; + const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; // fold vector ops if (VT.isVector()) @@ -8028,12 +8321,12 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold (fsub c1, c2) -> c1-c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FSUB, dl, VT, N0, N1); + return DAG.getNode(ISD::FSUB, dl, VT, N0, N1, Flags); // fold (fsub A, (fneg B)) -> (fadd A, B) if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) return DAG.getNode(ISD::FADD, dl, VT, N0, - GetNegatedExpression(N1, DAG, LegalOperations)); + GetNegatedExpression(N1, DAG, LegalOperations), Flags); // If 'unsafe math' is enabled, fold lots of things. if (Options.UnsafeFPMath) { @@ -8068,8 +8361,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // FSUB -> FMA combines: - SDValue Fused = visitFSUBForFMACombine(N); - if (Fused) { + if (SDValue Fused = visitFSUBForFMACombine(N)) { AddToWorklist(Fused.getNode()); return Fused; } @@ -8085,6 +8377,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; + const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; // fold vector ops if (VT.isVector()) { @@ -8095,12 +8388,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // fold (fmul c1, c2) -> c1*c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FMUL, DL, VT, N0, N1); + return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags); // canonicalize constant to RHS if (isConstantFPBuildVectorOrConstantFP(N0) && !isConstantFPBuildVectorOrConstantFP(N1)) - return DAG.getNode(ISD::FMUL, DL, VT, N1, N0); + return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags); // fold (fmul A, 1.0) -> A if (N1CFP && N1CFP->isExactlyValue(1.0)) @@ -8129,8 +8422,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // the second operand of the outer multiply are constants. if ((N1CFP && isConstOrConstSplatFP(N01)) || (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { - SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1); - return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts); + SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags); } } } @@ -8139,16 +8432,18 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs // during an early run of DAGCombiner can prevent folding with fmuls // inserted during lowering. - if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) { + if (N0.getOpcode() == ISD::FADD && + (N0.getOperand(0) == N0.getOperand(1)) && + N0.hasOneUse()) { const SDValue Two = DAG.getConstantFP(2.0, DL, VT); - SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1); - return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts); + SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags); } } // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) - return DAG.getNode(ISD::FADD, DL, VT, N0, N0); + return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags); // fold (fmul X, -1.0) -> (fneg X) if (N1CFP && N1CFP->isExactlyValue(-1.0)) @@ -8163,10 +8458,17 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (LHSNeg == 2 || RHSNeg == 2) return DAG.getNode(ISD::FMUL, DL, VT, GetNegatedExpression(N0, DAG, LegalOperations), - GetNegatedExpression(N1, DAG, LegalOperations)); + GetNegatedExpression(N1, DAG, LegalOperations), + Flags); } } + // FMUL -> FMA combines: + if (SDValue Fused = visitFMULForFMACombine(N)) { + AddToWorklist(Fused.getNode()); + return Fused; + } + return SDValue(); } @@ -8193,66 +8495,145 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (N1CFP && N1CFP->isZero()) return N2; } + // TODO: The FMA node should have flags that propagate to these nodes. if (N0CFP && N0CFP->isExactlyValue(1.0)) return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); if (N1CFP && N1CFP->isExactlyValue(1.0)) return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); // Canonicalize (fma c, x, y) -> (fma x, c, y) - if (N0CFP && !N1CFP) + if (isConstantFPBuildVectorOrConstantFP(N0) && + !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); - // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) - if (Options.UnsafeFPMath && N1CFP && - N2.getOpcode() == ISD::FMUL && - N0 == N2.getOperand(0) && - N2.getOperand(1).getOpcode() == ISD::ConstantFP) { - return DAG.getNode(ISD::FMUL, dl, VT, N0, - DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1))); - } + // TODO: FMA nodes should have flags that propagate to the created nodes. + // For now, create a Flags object for use with all unsafe math transforms. + SDNodeFlags Flags; + Flags.setUnsafeAlgebra(true); + if (Options.UnsafeFPMath) { + // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) + if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) && + isConstantFPBuildVectorOrConstantFP(N1) && + isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) { + return DAG.getNode(ISD::FMUL, dl, VT, N0, + DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1), + &Flags), &Flags); + } - // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) - if (Options.UnsafeFPMath && - N0.getOpcode() == ISD::FMUL && N1CFP && - N0.getOperand(1).getOpcode() == ISD::ConstantFP) { - return DAG.getNode(ISD::FMA, dl, VT, - N0.getOperand(0), - DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)), - N2); + // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) + if (N0.getOpcode() == ISD::FMUL && + isConstantFPBuildVectorOrConstantFP(N1) && + isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { + return DAG.getNode(ISD::FMA, dl, VT, + N0.getOperand(0), + DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1), + &Flags), + N2); + } } // (fma x, 1, y) -> (fadd x, y) // (fma x, -1, y) -> (fadd (fneg x), y) if (N1CFP) { if (N1CFP->isExactlyValue(1.0)) + // TODO: The FMA node should have flags that propagate to this node. return DAG.getNode(ISD::FADD, dl, VT, N0, N2); if (N1CFP->isExactlyValue(-1.0) && (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); AddToWorklist(RHSNeg.getNode()); + // TODO: The FMA node should have flags that propagate to this node. return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); } } - // (fma x, c, x) -> (fmul x, (c+1)) - if (Options.UnsafeFPMath && N1CFP && N0 == N2) - return DAG.getNode(ISD::FMUL, dl, VT, N0, - DAG.getNode(ISD::FADD, dl, VT, - N1, DAG.getConstantFP(1.0, dl, VT))); - - // (fma x, c, (fneg x)) -> (fmul x, (c-1)) - if (Options.UnsafeFPMath && N1CFP && - N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) + if (Options.UnsafeFPMath) { + // (fma x, c, x) -> (fmul x, (c+1)) + if (N1CFP && N0 == N2) { return DAG.getNode(ISD::FMUL, dl, VT, N0, - DAG.getNode(ISD::FADD, dl, VT, - N1, DAG.getConstantFP(-1.0, dl, VT))); + DAG.getNode(ISD::FADD, dl, VT, + N1, DAG.getConstantFP(1.0, dl, VT), + &Flags), &Flags); + } + // (fma x, c, (fneg x)) -> (fmul x, (c-1)) + if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, dl, VT, N0, + DAG.getNode(ISD::FADD, dl, VT, + N1, DAG.getConstantFP(-1.0, dl, VT), + &Flags), &Flags); + } + } return SDValue(); } +// Combine multiple FDIVs with the same divisor into multiple FMULs by the +// reciprocal. +// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) +// Notice that this is not always beneficial. One reason is different target +// may have different costs for FDIV and FMUL, so sometimes the cost of two +// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason +// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". +SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { + bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath; + const SDNodeFlags *Flags = N->getFlags(); + if (!UnsafeMath && !Flags->hasAllowReciprocal()) + return SDValue(); + + // Skip if current node is a reciprocal. + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + if (N0CFP && N0CFP->isExactlyValue(1.0)) + return SDValue(); + + // Exit early if the target does not want this transform or if there can't + // possibly be enough uses of the divisor to make the transform worthwhile. + SDValue N1 = N->getOperand(1); + unsigned MinUses = TLI.combineRepeatedFPDivisors(); + if (!MinUses || N1->use_size() < MinUses) + return SDValue(); + + // Find all FDIV users of the same divisor. + // Use a set because duplicates may be present in the user list. + SetVector<SDNode *> Users; + for (auto *U : N1->uses()) { + if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) { + // This division is eligible for optimization only if global unsafe math + // is enabled or if this division allows reciprocal formation. + if (UnsafeMath || U->getFlags()->hasAllowReciprocal()) + Users.insert(U); + } + } + + // Now that we have the actual number of divisor uses, make sure it meets + // the minimum threshold specified by the target. + if (Users.size() < MinUses) + return SDValue(); + + EVT VT = N->getValueType(0); + SDLoc DL(N); + SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); + SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags); + + // Dividend / Divisor -> Dividend * Reciprocal + for (auto *U : Users) { + SDValue Dividend = U->getOperand(0); + if (Dividend != FPOne) { + SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend, + Reciprocal, Flags); + CombineTo(U, NewNode); + } else if (U != Reciprocal.getNode()) { + // In the absence of fast-math-flags, this user node is always the + // same node as Reciprocal, but with FMF they may be different nodes. + CombineTo(U, Reciprocal); + } + } + return SDValue(N, 0); // N was replaced. +} + SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -8261,6 +8642,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; + SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; // fold vector ops if (VT.isVector()) @@ -8269,7 +8651,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // fold (fdiv c1, c2) -> c1/c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags); if (Options.UnsafeFPMath) { // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. @@ -8288,28 +8670,30 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || TLI.isFPImmLegal(Recip, VT))) return DAG.getNode(ISD::FMUL, DL, VT, N0, - DAG.getConstantFP(Recip, DL, VT)); + DAG.getConstantFP(Recip, DL, VT), Flags); } // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { - if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) { - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0), Flags)) { + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } else if (N1.getOpcode() == ISD::FP_EXTEND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0), + Flags)) { RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } else if (N1.getOpcode() == ISD::FP_ROUND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0), + Flags)) { RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } else if (N1.getOpcode() == ISD::FMUL) { // Look through an FMUL. Even though this won't remove the FDIV directly, @@ -8326,18 +8710,18 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (SqrtOp.getNode()) { // We found a FSQRT, so try to make this fold: // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) - if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) { - RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp); + if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) { + RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags); AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } } // Fold into a reciprocal estimate and multiply instead of a real divide. - if (SDValue RV = BuildReciprocalEstimate(N1)) { + if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) { AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } @@ -8349,52 +8733,13 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (LHSNeg == 2 || RHSNeg == 2) return DAG.getNode(ISD::FDIV, SDLoc(N), VT, GetNegatedExpression(N0, DAG, LegalOperations), - GetNegatedExpression(N1, DAG, LegalOperations)); + GetNegatedExpression(N1, DAG, LegalOperations), + Flags); } } - // Combine multiple FDIVs with the same divisor into multiple FMULs by the - // reciprocal. - // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) - // Notice that this is not always beneficial. One reason is different target - // may have different costs for FDIV and FMUL, so sometimes the cost of two - // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason - // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". - if (Options.UnsafeFPMath) { - // Skip if current node is a reciprocal. - if (N0CFP && N0CFP->isExactlyValue(1.0)) - return SDValue(); - - // Find all FDIV users of the same divisor. - // Use a set because duplicates may be present in the user list. - SetVector<SDNode *> Users; - for (auto *U : N1->uses()) - if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) - Users.insert(U); - - if (TLI.combineRepeatedFPDivisors(Users.size())) { - SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); - // FIXME: This optimization requires some level of fast-math, so the - // created reciprocal node should at least have the 'allowReciprocal' - // fast-math-flag set. - SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1); - - // Dividend / Divisor -> Dividend * Reciprocal - for (auto *U : Users) { - SDValue Dividend = U->getOperand(0); - if (Dividend != FPOne) { - SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend, - Reciprocal); - CombineTo(U, NewNode); - } else if (U != Reciprocal.getNode()) { - // In the absence of fast-math-flags, this user node is always the - // same node as Reciprocal, but with FMF they may be different nodes. - CombineTo(U, Reciprocal); - } - } - return SDValue(N, 0); // N was replaced. - } - } + if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N)) + return CombineRepeatedDivisors; return SDValue(); } @@ -8408,7 +8753,8 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { // fold (frem c1, c2) -> fmod(c1,c2) if (N0CFP && N1CFP) - return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, + &cast<BinaryWithFlagsSDNode>(N)->Flags); return SDValue(); } @@ -8417,20 +8763,25 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap()) return SDValue(); + // TODO: FSQRT nodes should have flags that propagate to the created nodes. + // For now, create a Flags object for use with all unsafe math transforms. + SDNodeFlags Flags; + Flags.setUnsafeAlgebra(true); + // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) - SDValue RV = BuildRsqrtEstimate(N->getOperand(0)); + SDValue RV = BuildRsqrtEstimate(N->getOperand(0), &Flags); if (!RV) return SDValue(); - + EVT VT = RV.getValueType(); SDLoc DL(N); - RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV); + RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV, &Flags); AddToWorklist(RV.getNode()); // Unfortunately, RV is now NaN if the input was exactly 0. // Select out this case and force the answer to 0. SDValue Zero = DAG.getConstantFP(0.0, DL, VT); - EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + EVT CCVT = getSetCCResultType(VT); SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ); AddToWorklist(ZeroCmp.getNode()); AddToWorklist(RV.getNode()); @@ -8439,6 +8790,22 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { ZeroCmp, Zero, RV); } +static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { + // copysign(x, fp_extend(y)) -> copysign(x, y) + // copysign(x, fp_round(y)) -> copysign(x, y) + // Do not optimize out type conversion of f128 type yet. + // For some target like x86_64, configuration is changed + // to keep one f128 value in one SSE register, but + // instruction selection cannot handle FCOPYSIGN on + // SSE registers yet. + SDValue N1 = N->getOperand(1); + EVT N1VT = N1->getValueType(0); + EVT N1Op0VT = N1->getOperand(0)->getValueType(0); + return (N1.getOpcode() == ISD::FP_EXTEND || + N1.getOpcode() == ISD::FP_ROUND) && + (N1VT == N1Op0VT || N1Op0VT != MVT::f128); +} + SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -8482,7 +8849,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { // copysign(x, fp_extend(y)) -> copysign(x, y) // copysign(x, fp_round(y)) -> copysign(x, y) - if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) + if (CanCombineFCOPYSIGN_EXTEND_ROUND(N)) return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); @@ -8837,11 +9204,12 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { APFloat CVal = CFP1->getValueAPF(); CVal.changeSign(); if (Level >= AfterLegalizeDAG && - (TLI.isFPImmLegal(CVal, N->getValueType(0)) || - TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0)))) - return DAG.getNode( - ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), - DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1))); + (TLI.isFPImmLegal(CVal, VT) || + TLI.isOperationLegal(ISD::ConstantFP, VT))) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N0.getOperand(1)), + &cast<BinaryWithFlagsSDNode>(N0)->Flags); } } @@ -8851,20 +9219,20 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue DAGCombiner::visitFMINNUM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); + const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); if (N0CFP && N1CFP) { const APFloat &C0 = N0CFP->getValueAPF(); const APFloat &C1 = N1CFP->getValueAPF(); - return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), N->getValueType(0)); + return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT); } - if (N0CFP) { - EVT VT = N->getValueType(0); - // Canonicalize to constant on RHS. + // Canonicalize to constant on RHS. + if (isConstantFPBuildVectorOrConstantFP(N0) && + !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0); - } return SDValue(); } @@ -8872,20 +9240,20 @@ SDValue DAGCombiner::visitFMINNUM(SDNode *N) { SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); + const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); if (N0CFP && N1CFP) { const APFloat &C0 = N0CFP->getValueAPF(); const APFloat &C1 = N1CFP->getValueAPF(); - return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), N->getValueType(0)); + return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT); } - if (N0CFP) { - EVT VT = N->getValueType(0); - // Canonicalize to constant on RHS. + // Canonicalize to constant on RHS. + if (isConstantFPBuildVectorOrConstantFP(N0) && + !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0); - } return SDValue(); } @@ -9034,8 +9402,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { SDValue Op1 = TheXor->getOperand(1); if (Op0.getOpcode() == Op1.getOpcode()) { // Avoid missing important xor optimizations. - SDValue Tmp = visitXOR(TheXor); - if (Tmp.getNode()) { + if (SDValue Tmp = visitXOR(TheXor)) { if (Tmp.getNode() != TheXor) { DEBUG(dbgs() << "\nReplacing.8 "; TheXor->dump(&DAG); @@ -9722,8 +10089,8 @@ struct LoadedSlice { void addSliceGain(const LoadedSlice &LS) { // Each slice saves a truncate. const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); - if (!TLI.isTruncateFree(LS.Inst->getValueType(0), - LS.Inst->getOperand(0).getValueType())) + if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(), + LS.Inst->getValueType(0))) ++Truncates; // If there is a shift amount, this slice gets rid of it. if (LS.Shift) @@ -10625,30 +10992,109 @@ struct BaseIndexOffset { }; } // namespace +// This is a helper function for visitMUL to check the profitability +// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). +// MulNode is the original multiply, AddNode is (add x, c1), +// and ConstNode is c2. +// +// If the (add x, c1) has multiple uses, we could increase +// the number of adds if we make this transformation. +// It would only be worth doing this if we can remove a +// multiply in the process. Check for that here. +// To illustrate: +// (A + c1) * c3 +// (A + c2) * c3 +// We're checking for cases where we have common "c3 * A" expressions. +bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, + SDValue &AddNode, + SDValue &ConstNode) { + APInt Val; + + // If the add only has one use, this would be OK to do. + if (AddNode.getNode()->hasOneUse()) + return true; + + // Walk all the users of the constant with which we're multiplying. + for (SDNode *Use : ConstNode->uses()) { + + if (Use == MulNode) // This use is the one we're on right now. Skip it. + continue; + + if (Use->getOpcode() == ISD::MUL) { // We have another multiply use. + SDNode *OtherOp; + SDNode *MulVar = AddNode.getOperand(0).getNode(); + + // OtherOp is what we're multiplying against the constant. + if (Use->getOperand(0) == ConstNode) + OtherOp = Use->getOperand(1).getNode(); + else + OtherOp = Use->getOperand(0).getNode(); + + // Check to see if multiply is with the same operand of our "add". + // + // ConstNode = CONST + // Use = ConstNode * A <-- visiting Use. OtherOp is A. + // ... + // AddNode = (A + c1) <-- MulVar is A. + // = AddNode * ConstNode <-- current visiting instruction. + // + // If we make this transformation, we will have a common + // multiply (ConstNode * A) that we can save. + if (OtherOp == MulVar) + return true; + + // Now check to see if a future expansion will give us a common + // multiply. + // + // ConstNode = CONST + // AddNode = (A + c1) + // ... = AddNode * ConstNode <-- current visiting instruction. + // ... + // OtherOp = (A + c2) + // Use = OtherOp * ConstNode <-- visiting Use. + // + // If we make this transformation, we will have a common + // multiply (CONST * A) after we also do the same transformation + // to the "t2" instruction. + if (OtherOp->getOpcode() == ISD::ADD && + isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) && + OtherOp->getOperand(0).getNode() == MulVar) + return true; + } + } + + // Didn't find a case where this would be profitable. + return false; +} + SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG, SDLoc SL, ArrayRef<MemOpLink> Stores, + SmallVectorImpl<SDValue> &Chains, EVT Ty) const { SmallVector<SDValue, 8> BuildVector; - for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) - BuildVector.push_back(cast<StoreSDNode>(Stores[I].MemNode)->getValue()); + for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) { + StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode); + Chains.push_back(St->getChain()); + BuildVector.push_back(St->getValue()); + } return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector); } bool DAGCombiner::MergeStoresOfConstantsOrVecElts( SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, - unsigned NumElem, bool IsConstantSrc, bool UseVector) { + unsigned NumStores, bool IsConstantSrc, bool UseVector) { // Make sure we have something to merge. - if (NumElem < 2) + if (NumStores < 2) return false; int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned LatestNodeUsed = 0; - for (unsigned i=0; i < NumElem; ++i) { + for (unsigned i=0; i < NumStores; ++i) { // Find a chain for the new wide-store operand. Notice that some // of the store nodes that we found may not be selected for inclusion // in the wide store. The chain we use needs to be the chain of the @@ -10657,45 +11103,57 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( LatestNodeUsed = i; } + SmallVector<SDValue, 8> Chains; + // The latest Node in the DAG. LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; SDLoc DL(StoreNodes[0].MemNode); SDValue StoredVal; if (UseVector) { - // Find a legal type for the vector store. - EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); + bool IsVec = MemVT.isVector(); + unsigned Elts = NumStores; + if (IsVec) { + // When merging vector stores, get the total number of elements. + Elts *= MemVT.getVectorNumElements(); + } + // Get the type for the merged vector store. + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); + if (IsConstantSrc) { - StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Ty); + StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty); } else { SmallVector<SDValue, 8> Ops; - for (unsigned i = 0; i < NumElem ; ++i) { + for (unsigned i = 0; i < NumStores; ++i) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); SDValue Val = St->getValue(); - // All of the operands of a BUILD_VECTOR must have the same type. + // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type. if (Val.getValueType() != MemVT) return false; Ops.push_back(Val); + Chains.push_back(St->getChain()); } // Build the extracted vector elements back into a vector. - StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops); - } + StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, + DL, Ty, Ops); } } else { // We should always use a vector store when merging extracted vector // elements, so this path implies a store of constants. assert(IsConstantSrc && "Merged vector elements should use vector store"); - unsigned SizeInBits = NumElem * ElementSizeBytes * 8; + unsigned SizeInBits = NumStores * ElementSizeBytes * 8; APInt StoreInt(SizeInBits, 0); // Construct a single integer constant which is made of the smaller // constant inputs. bool IsLE = DAG.getDataLayout().isLittleEndian(); - for (unsigned i = 0; i < NumElem ; ++i) { - unsigned Idx = IsLE ? (NumElem - 1 - i) : i; + for (unsigned i = 0; i < NumStores; ++i) { + unsigned Idx = IsLE ? (NumStores - 1 - i) : i; StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); + Chains.push_back(St->getChain()); + SDValue Val = St->getValue(); StoreInt <<= ElementSizeBytes * 8; if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { @@ -10712,7 +11170,10 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); } - SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal, + assert(!Chains.empty()); + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), false, false, @@ -10721,7 +11182,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( // Replace the last store with the new store CombineTo(LatestOp, NewStore); // Erase all other stores. - for (unsigned i = 0; i < NumElem ; ++i) { + for (unsigned i = 0; i < NumStores; ++i) { if (StoreNodes[i].MemNode == LatestOp) continue; StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); @@ -10743,17 +11204,6 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( return true; } -static bool allowableAlignment(const SelectionDAG &DAG, - const TargetLowering &TLI, EVT EVTTy, - unsigned AS, unsigned Align) { - if (TLI.allowsMisalignedMemoryAccesses(EVTTy, AS, Align)) - return true; - - Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = DAG.getDataLayout().getPrefTypeAlignment(Ty); - return (Align >= ABIAlignment); -} - void DAGCombiner::getStoreMergeAndAliasCandidates( StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes, SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) { @@ -10775,6 +11225,38 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( EVT MemVT = St->getMemoryVT(); unsigned Seq = 0; StoreSDNode *Index = St; + + + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); + + if (UseAA) { + // Look at other users of the same chain. Stores on the same chain do not + // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized + // to be on the same chain, so don't bother looking at adjacent chains. + + SDValue Chain = St->getChain(); + for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) { + if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) { + if (I.getOperandNo() != 0) + continue; + + if (OtherST->isVolatile() || OtherST->isIndexed()) + continue; + + if (OtherST->getMemoryVT() != MemVT) + continue; + + BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr()); + + if (Ptr.equalBaseIndex(BasePtr)) + StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++)); + } + } + + return; + } + while (Index) { // If the chain has more than one use, then we can't reorder the mem ops. if (Index != St && !SDValue(Index, 0)->hasOneUse()) @@ -10800,6 +11282,13 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( if (Index->getMemoryVT() != MemVT) break; + // We do not allow under-aligned stores in order to prevent + // overriding stores. NOTE: this is a bad hack. Alignment SHOULD + // be irrelevant here; what MATTERS is that we not move memory + // operations that potentially overlap past each-other. + if (Index->getAlignment() < MemVT.getStoreSize()) + break; + // We found a potential memory operand to merge. StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++)); @@ -10844,8 +11333,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (ElementSizeBytes * 8 != MemVT.getSizeInBits()) return false; - // Don't merge vectors into wider inputs. - if (MemVT.isVector() || !MemVT.isSimple()) + if (!MemVT.isSimple()) return false; // Perform an early exit check. Do not bother looking at stored values that @@ -10854,9 +11342,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { bool IsLoadSrc = isa<LoadSDNode>(StoredVal); bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) || isa<ConstantFPSDNode>(StoredVal); - bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT); + bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT || + StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR); - if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc) + if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc) + return false; + + // Don't merge vectors into wider vectors if the source data comes from loads. + // TODO: This restriction can be lifted by using logic similar to the + // ExtractVecSrc case. + if (MemVT.isVector() && IsLoadSrc) return false; // Only look at ends of store sequences. @@ -10868,22 +11363,28 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // We need to make sure that these nodes do not interfere with // any of the store nodes. SmallVector<LSBaseSDNode*, 8> AliasLoadNodes; - + // Save the StoreSDNodes that we find in the chain. SmallVector<MemOpLink, 8> StoreNodes; getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes); - + // Check if there is anything to merge. if (StoreNodes.size() < 2) return false; - // Sort the memory operands according to their distance from the base pointer. + // Sort the memory operands according to their distance from the + // base pointer. As a secondary criteria: make sure stores coming + // later in the code come first in the list. This is important for + // the non-UseAA case, because we're merging stores into the FINAL + // store along a chain which potentially contains aliasing stores. + // Thus, if there are multiple stores to the same address, the last + // one can be considered for merging but not the others. std::sort(StoreNodes.begin(), StoreNodes.end(), [](MemOpLink LHS, MemOpLink RHS) { return LHS.OffsetFromBase < RHS.OffsetFromBase || (LHS.OffsetFromBase == RHS.OffsetFromBase && - LHS.SequenceNum > RHS.SequenceNum); + LHS.SequenceNum < RHS.SequenceNum); }); // Scan the memory operations on the chain and find the first non-consecutive @@ -10900,15 +11401,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { break; } - bool Alias = false; // Check if this store interferes with any of the loads that we found. - for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld) - if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) { - Alias = true; - break; - } - // We found a load that alias with this store. Stop the sequence. - if (Alias) + // If we find a load that alias with this store. Stop the sequence. + if (std::any_of(AliasLoadNodes.begin(), AliasLoadNodes.end(), + [&](LSBaseSDNode* Ldn) { + return isAlias(Ldn, StoreNodes[i].MemNode); + })) break; // Mark this node as useful. @@ -10919,6 +11417,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned FirstStoreAS = FirstInChain->getAddressSpace(); unsigned FirstStoreAlign = FirstInChain->getAlignment(); + LLVMContext &Context = *DAG.getContext(); + const DataLayout &DL = DAG.getDataLayout(); // Store the constants into memory as one consecutive store. if (IsConstantSrc) { @@ -10940,43 +11440,40 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Find a legal type for the constant store. unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; - EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); + EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); + bool IsFast; if (TLI.isTypeLegal(StoreTy) && - allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, - FirstStoreAlign)) { + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFast) && IsFast) { LastLegalType = i+1; // Or check whether a truncstore is legal. - } else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == + } else if (TLI.getTypeAction(Context, StoreTy) == TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = - TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); + TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && - allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS, - FirstStoreAlign)) { + TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, + FirstStoreAS, FirstStoreAlign, &IsFast) && + IsFast) { LastLegalType = i + 1; } } - // Find a legal type for the vector store. - EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); - if (TLI.isTypeLegal(Ty) && - allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) { - LastLegalVectorType = i + 1; + // We only use vectors if the constant is known to be zero or the target + // allows it and the function is not marked with the noimplicitfloat + // attribute. + if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1, + FirstStoreAS)) && + !NoVectors) { + // Find a legal type for the vector store. + EVT Ty = EVT::getVectorVT(Context, MemVT, i+1); + if (TLI.isTypeLegal(Ty) && + TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, + FirstStoreAlign, &IsFast) && IsFast) + LastLegalVectorType = i + 1; } } - - // We only use vectors if the constant is known to be zero or the target - // allows it and the function is not marked with the noimplicitfloat - // attribute. - if (NoVectors) { - LastLegalVectorType = 0; - } else if (NonZero && !TLI.storeOfVectorConstantIsCheap(MemVT, - LastLegalVectorType, - FirstStoreAS)) { - LastLegalVectorType = 0; - } - // Check if we found a legal integer type to store. if (LastLegalType == 0 && LastLegalVectorType == 0) return false; @@ -10990,27 +11487,36 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // When extracting multiple vector elements, try to store them // in one vector store rather than a sequence of scalar stores. - if (IsExtractVecEltSrc) { - unsigned NumElem = 0; + if (IsExtractVecSrc) { + unsigned NumStoresToMerge = 0; + bool IsVec = MemVT.isVector(); for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - SDValue StoredVal = St->getValue(); + unsigned StoreValOpcode = St->getValue().getOpcode(); // This restriction could be loosened. // Bail out if any stored values are not elements extracted from a vector. // It should be possible to handle mixed sources, but load sources need // more careful handling (see the block of code below that handles // consecutive loads). - if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT && + StoreValOpcode != ISD::EXTRACT_SUBVECTOR) return false; // Find a legal type for the vector store. - EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); + unsigned Elts = i + 1; + if (IsVec) { + // When merging vector stores, get the total number of elements. + Elts *= MemVT.getVectorNumElements(); + } + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); + bool IsFast; if (TLI.isTypeLegal(Ty) && - allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) - NumElem = i + 1; + TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, + FirstStoreAlign, &IsFast) && IsFast) + NumStoresToMerge = i + 1; } - return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, + return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge, false, true); } @@ -11084,7 +11590,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { StartAddress = LoadNodes[0].OffsetFromBase; SDValue FirstChain = FirstLoad->getChain(); for (unsigned i = 1; i < LoadNodes.size(); ++i) { - // All loads much share the same chain. + // All loads must share the same chain. if (LoadNodes[i].MemNode->getChain() != FirstChain) break; @@ -11092,35 +11598,41 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (CurrAddress - StartAddress != (ElementSizeBytes * i)) break; LastConsecutiveLoad = i; - // Find a legal type for the vector store. - EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); + EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1); + bool IsFastSt, IsFastLd; if (TLI.isTypeLegal(StoreTy) && - allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) && - allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) { + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFastSt) && IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, + FirstLoadAlign, &IsFastLd) && IsFastLd) { LastLegalVectorType = i + 1; } // Find a legal type for the integer store. unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; - StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); + StoreTy = EVT::getIntegerVT(Context, SizeInBits); if (TLI.isTypeLegal(StoreTy) && - allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) && - allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFastSt) && IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, + FirstLoadAlign, &IsFastLd) && IsFastLd) LastLegalIntegerType = i + 1; // Or check whether a truncstore and extload is legal. - else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == + else if (TLI.getTypeAction(Context, StoreTy) == TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = - TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy); + TLI.getTypeToTransformTo(Context, StoreTy); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) && - allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS, - FirstStoreAlign) && - allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstLoadAS, - FirstLoadAlign)) + TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, + FirstStoreAS, FirstStoreAlign, &IsFastSt) && + IsFastSt && + TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, + FirstLoadAS, FirstLoadAlign, &IsFastLd) && + IsFastLd) LastLegalIntegerType = i+1; } } @@ -11138,6 +11650,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (NumElem < 2) return false; + // Collect the chains from all merged stores. + SmallVector<SDValue, 8> MergeStoreChains; + MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain()); + // The latest Node in the DAG. unsigned LatestNodeUsed = 0; for (unsigned i=1; i<NumElem; ++i) { @@ -11147,6 +11663,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // latest store node which is *used* and replaced by the wide store. if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum) LatestNodeUsed = i; + + MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain()); } LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; @@ -11155,34 +11673,33 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // to memory. EVT JointMemOpVT; if (UseVectorTy) { - JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); + JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem); } else { unsigned SizeInBits = NumElem * ElementSizeBytes * 8; - JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); + JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); } SDLoc LoadDL(LoadNodes[0].MemNode); SDLoc StoreDL(StoreNodes[0].MemNode); + // The merged loads are required to have the same incoming chain, so + // using the first's chain is acceptable. SDValue NewLoad = DAG.getLoad( JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign); + SDValue NewStoreChain = + DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains); + SDValue NewStore = DAG.getStore( - LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(), + NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), false, false, FirstStoreAlign); - // Replace one of the loads with the new load. - LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode); - DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), - SDValue(NewLoad.getNode(), 1)); - - // Remove the rest of the load chains. - for (unsigned i = 1; i < NumElem ; ++i) { - // Replace all chain users of the old load nodes with the chain of the new - // load node. + // Transfer chain users from old loads to the new load. + for (unsigned i = 0; i < NumElem; ++i) { LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); - DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain()); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), + SDValue(NewLoad.getNode(), 1)); } // Replace the last store with the new store. @@ -11200,6 +11717,114 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { return true; } +SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) { + SDLoc SL(ST); + SDValue ReplStore; + + // Replace the chain to avoid dependency. + if (ST->isTruncatingStore()) { + ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(), + ST->getBasePtr(), ST->getMemoryVT(), + ST->getMemOperand()); + } else { + ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(), + ST->getMemOperand()); + } + + // Create token to keep both nodes around. + SDValue Token = DAG.getNode(ISD::TokenFactor, SL, + MVT::Other, ST->getChain(), ReplStore); + + // Make sure the new and old chains are cleaned up. + AddToWorklist(Token.getNode()); + + // Don't add users to work list. + return CombineTo(ST, Token, false); +} + +SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { + SDValue Value = ST->getValue(); + if (Value.getOpcode() == ISD::TargetConstantFP) + return SDValue(); + + SDLoc DL(ST); + + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + + const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value); + + // NOTE: If the original store is volatile, this transform must not increase + // the number of stores. For example, on x86-32 an f64 can be stored in one + // processor operation but an i64 (which is not legal) requires two. So the + // transform should not be done in this case. + + SDValue Tmp; + switch (CFP->getSimpleValueType(0).SimpleTy) { + default: + llvm_unreachable("Unknown FP type"); + case MVT::f16: // We don't do this for these yet. + case MVT::f80: + case MVT::f128: + case MVT::ppcf128: + return SDValue(); + case MVT::f32: + if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + ; + Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). + bitcastToAPInt().getZExtValue(), SDLoc(CFP), + MVT::i32); + return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand()); + } + + return SDValue(); + case MVT::f64: + if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && + !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { + ; + Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). + getZExtValue(), SDLoc(CFP), MVT::i64); + return DAG.getStore(Chain, DL, Tmp, + Ptr, ST->getMemOperand()); + } + + if (!ST->isVolatile() && + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + // Many FP stores are not made apparent until after legalize, e.g. for + // argument passing. Since this is so common, custom legalize the + // 64-bit integer store into two 32-bit stores. + uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32); + SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32); + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + AAMDNodes AAInfo = ST->getAAInfo(); + + SDValue St0 = DAG.getStore(Chain, DL, Lo, + Ptr, ST->getPointerInfo(), + isVolatile, isNonTemporal, + ST->getAlignment(), AAInfo); + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(4, DL, Ptr.getValueType())); + Alignment = MinAlign(Alignment, 4U); + SDValue St1 = DAG.getStore(Chain, DL, Hi, + Ptr, ST->getPointerInfo().getWithOffset(4), + isVolatile, isNonTemporal, + Alignment, AAInfo); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + St0, St1); + } + + return SDValue(); + } +} + SDValue DAGCombiner::visitSTORE(SDNode *N) { StoreSDNode *ST = cast<StoreSDNode>(N); SDValue Chain = ST->getChain(); @@ -11227,81 +11852,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) return Chain; - // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' - if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) { - // NOTE: If the original store is volatile, this transform must not increase - // the number of stores. For example, on x86-32 an f64 can be stored in one - // processor operation but an i64 (which is not legal) requires two. So the - // transform should not be done in this case. - if (Value.getOpcode() != ISD::TargetConstantFP) { - SDValue Tmp; - switch (CFP->getSimpleValueType(0).SimpleTy) { - default: llvm_unreachable("Unknown FP type"); - case MVT::f16: // We don't do this for these yet. - case MVT::f80: - case MVT::f128: - case MVT::ppcf128: - break; - case MVT::f32: - if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || - TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { - ; - Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). - bitcastToAPInt().getZExtValue(), SDLoc(CFP), - MVT::i32); - return DAG.getStore(Chain, SDLoc(N), Tmp, - Ptr, ST->getMemOperand()); - } - break; - case MVT::f64: - if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && - !ST->isVolatile()) || - TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { - ; - Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). - getZExtValue(), SDLoc(CFP), MVT::i64); - return DAG.getStore(Chain, SDLoc(N), Tmp, - Ptr, ST->getMemOperand()); - } - - if (!ST->isVolatile() && - TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { - // Many FP stores are not made apparent until after legalize, e.g. for - // argument passing. Since this is so common, custom legalize the - // 64-bit integer store into two 32-bit stores. - uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32); - SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32); - if (DAG.getDataLayout().isBigEndian()) - std::swap(Lo, Hi); - - unsigned Alignment = ST->getAlignment(); - bool isVolatile = ST->isVolatile(); - bool isNonTemporal = ST->isNonTemporal(); - AAMDNodes AAInfo = ST->getAAInfo(); - - SDLoc DL(N); - - SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo, - Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, - ST->getAlignment(), AAInfo); - Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(4, DL, Ptr.getValueType())); - Alignment = MinAlign(Alignment, 4U); - SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi, - Ptr, ST->getPointerInfo().getWithOffset(4), - isVolatile, isNonTemporal, - Alignment, AAInfo); - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, - St0, St1); - } - - break; - } - } - } - // Try to infer better alignment information than the store already has. if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { @@ -11319,8 +11869,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Try transforming a pair floating point load / store ops to integer // load / store ops. - SDValue NewST = TransformFPLoadStorePair(N); - if (NewST.getNode()) + if (SDValue NewST = TransformFPLoadStorePair(N)) return NewST; bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA @@ -11331,31 +11880,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { UseAA = false; #endif if (UseAA && ST->isUnindexed()) { - // Walk up chain skipping non-aliasing memory nodes. - SDValue BetterChain = FindBetterChain(N, Chain); - - // If there is a better chain. - if (Chain != BetterChain) { - SDValue ReplStore; - - // Replace the chain to avoid dependency. - if (ST->isTruncatingStore()) { - ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr, - ST->getMemoryVT(), ST->getMemOperand()); - } else { - ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr, - ST->getMemOperand()); - } + // FIXME: We should do this even without AA enabled. AA will just allow + // FindBetterChain to work in more situations. The problem with this is that + // any combine that expects memory operations to be on consecutive chains + // first needs to be updated to look for users of the same chain. - // Create token to keep both nodes around. - SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), - MVT::Other, Chain, ReplStore); - - // Make sure the new and old chains are cleaned up. - AddToWorklist(Token.getNode()); - - // Don't add users to work list. - return CombineTo(N, Token, false); + // Walk up chain skipping non-aliasing memory nodes, on this store and any + // adjacent stores. + if (findBetterNeighborChains(ST)) { + // replaceStoreChain uses CombineTo, which handled all of the worklist + // manipulation. Return the original node to not do anything else. + return SDValue(ST, 0); } } @@ -11440,6 +11975,16 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return SDValue(N, 0); } + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' + // + // Make sure to do this only after attempting to merge stores in order to + // avoid changing the types of some subset of stores due to visit order, + // preventing their merging. + if (isa<ConstantFPSDNode>(Value)) { + if (SDValue NewSt = replaceStoreOfFPConstant(ST)) + return NewSt; + } + return ReduceLoadOpStoreWidth(N); } @@ -11613,7 +12158,24 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } SDValue EltNo = N->getOperand(1); - bool ConstEltNo = isa<ConstantSDNode>(EltNo); + ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); + + // extract_vector_elt (build_vector x, y), 1 -> y + if (ConstEltNo && + InVec.getOpcode() == ISD::BUILD_VECTOR && + TLI.isTypeLegal(VT) && + (InVec.hasOneUse() || + TLI.aggressivelyPreferBuildVectorSources(VT))) { + SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue()); + EVT InEltVT = Elt.getValueType(); + + // Sometimes build_vector's scalar input types do not match result type. + if (NVT == InEltVT) + return Elt; + + // TODO: It may be useful to truncate if free if the build_vector implicitly + // converts. + } // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. // We only perform this optimization before the op legalization phase because @@ -11621,13 +12183,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // patterns. For example on AVX, extracting elements from a wide vector // without using extract_subvector. However, if we can find an underlying // scalar value, then we can always use that. - if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE - && ConstEltNo) { - int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) { int NumElem = VT.getVectorNumElements(); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec); // Find the new index to extract from. - int OrigElt = SVOp->getMaskElt(Elt); + int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue()); // Extracting an undef index is undef. if (OrigElt == -1) @@ -12183,12 +12743,90 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops)); } -SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { - // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of - // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector - // inputs come from at most two distinct vectors, turn this into a shuffle - // node. +// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR +// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at +// most two distinct vectors the same size as the result, attempt to turn this +// into a legal shuffle. +static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + EVT OpVT = N->getOperand(0).getValueType(); + int NumElts = VT.getVectorNumElements(); + int NumOpElts = OpVT.getVectorNumElements(); + + SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT); + SmallVector<int, 8> Mask; + + for (SDValue Op : N->ops()) { + // Peek through any bitcast. + while (Op.getOpcode() == ISD::BITCAST) + Op = Op.getOperand(0); + + // UNDEF nodes convert to UNDEF shuffle mask values. + if (Op.getOpcode() == ISD::UNDEF) { + Mask.append((unsigned)NumOpElts, -1); + continue; + } + + if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) + return SDValue(); + + // What vector are we extracting the subvector from and at what index? + SDValue ExtVec = Op.getOperand(0); + + // We want the EVT of the original extraction to correctly scale the + // extraction index. + EVT ExtVT = ExtVec.getValueType(); + + // Peek through any bitcast. + while (ExtVec.getOpcode() == ISD::BITCAST) + ExtVec = ExtVec.getOperand(0); + + // UNDEF nodes convert to UNDEF shuffle mask values. + if (ExtVec.getOpcode() == ISD::UNDEF) { + Mask.append((unsigned)NumOpElts, -1); + continue; + } + + if (!isa<ConstantSDNode>(Op.getOperand(1))) + return SDValue(); + int ExtIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + + // Ensure that we are extracting a subvector from a vector the same + // size as the result. + if (ExtVT.getSizeInBits() != VT.getSizeInBits()) + return SDValue(); + + // Scale the subvector index to account for any bitcast. + int NumExtElts = ExtVT.getVectorNumElements(); + if (0 == (NumExtElts % NumElts)) + ExtIdx /= (NumExtElts / NumElts); + else if (0 == (NumElts % NumExtElts)) + ExtIdx *= (NumElts / NumExtElts); + else + return SDValue(); + // At most we can reference 2 inputs in the final shuffle. + if (SV0.getOpcode() == ISD::UNDEF || SV0 == ExtVec) { + SV0 = ExtVec; + for (int i = 0; i != NumOpElts; ++i) + Mask.push_back(i + ExtIdx); + } else if (SV1.getOpcode() == ISD::UNDEF || SV1 == ExtVec) { + SV1 = ExtVec; + for (int i = 0; i != NumOpElts; ++i) + Mask.push_back(i + ExtIdx + NumElts); + } else { + return SDValue(); + } + } + + if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT)) + return SDValue(); + + return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0), + DAG.getBitcast(VT, SV1), Mask); +} + +SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // If we only have one input vector, we don't need to do any concatenation. if (N->getNumOperands() == 1) return N->getOperand(0); @@ -12289,6 +12927,11 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (SDValue V = combineConcatVectorOfScalars(N, DAG)) return V; + // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE. + if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) + if (SDValue V = combineConcatVectorOfExtracts(N, DAG)) + return V; + // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that @@ -12503,7 +13146,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { std::all_of(SVN->getMask().begin() + NumElemsPerConcat, SVN->getMask().end(), [](int i) { return i == -1; })) { N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1), - ArrayRef<int>(SVN->getMask().begin(), NumElemsPerConcat)); + makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat)); N1 = DAG.getUNDEF(ConcatVT); return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1); } @@ -12981,6 +13624,21 @@ SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { + SDValue N0 = N->getOperand(0); + + // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) + if (N0->getOpcode() == ISD::AND) { + ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1)); + if (AndConst && AndConst->getAPIntValue() == 0xffff) { + return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), + N0.getOperand(0)); + } + } + + return SDValue(); +} + /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle /// with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> @@ -13002,34 +13660,76 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { if (RHS.getOpcode() == ISD::BITCAST) RHS = RHS.getOperand(0); - if (RHS.getOpcode() == ISD::BUILD_VECTOR) { + if (RHS.getOpcode() != ISD::BUILD_VECTOR) + return SDValue(); + + EVT RVT = RHS.getValueType(); + unsigned NumElts = RHS.getNumOperands(); + + // Attempt to create a valid clear mask, splitting the mask into + // sub elements and checking to see if each is + // all zeros or all ones - suitable for shuffle masking. + auto BuildClearMask = [&](int Split) { + int NumSubElts = NumElts * Split; + int NumSubBits = RVT.getScalarSizeInBits() / Split; + SmallVector<int, 8> Indices; - unsigned NumElts = RHS.getNumOperands(); + for (int i = 0; i != NumSubElts; ++i) { + int EltIdx = i / Split; + int SubIdx = i % Split; + SDValue Elt = RHS.getOperand(EltIdx); + if (Elt.getOpcode() == ISD::UNDEF) { + Indices.push_back(-1); + continue; + } - for (unsigned i = 0; i != NumElts; ++i) { - SDValue Elt = RHS.getOperand(i); - if (isAllOnesConstant(Elt)) + APInt Bits; + if (isa<ConstantSDNode>(Elt)) + Bits = cast<ConstantSDNode>(Elt)->getAPIntValue(); + else if (isa<ConstantFPSDNode>(Elt)) + Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt(); + else + return SDValue(); + + // Extract the sub element from the constant bit mask. + if (DAG.getDataLayout().isBigEndian()) { + Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits); + } else { + Bits = Bits.lshr(SubIdx * NumSubBits); + } + + if (Split > 1) + Bits = Bits.trunc(NumSubBits); + + if (Bits.isAllOnesValue()) Indices.push_back(i); - else if (isNullConstant(Elt)) - Indices.push_back(NumElts+i); + else if (Bits == 0) + Indices.push_back(i + NumSubElts); else return SDValue(); } // Let's see if the target supports this vector_shuffle. - EVT RVT = RHS.getValueType(); - if (!TLI.isVectorClearMaskLegal(Indices, RVT)) + EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits); + EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts); + if (!TLI.isVectorClearMaskLegal(Indices, ClearVT)) return SDValue(); - // Return the new VECTOR_SHUFFLE node. - EVT EltVT = RVT.getVectorElementType(); - SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), - DAG.getConstant(0, dl, EltVT)); - SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, RVT, ZeroOps); - LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); - SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); - return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); - } + SDValue Zero = DAG.getConstant(0, dl, ClearVT); + return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl, + DAG.getBitcast(ClearVT, LHS), + Zero, &Indices[0])); + }; + + // Determine maximum split level (byte level masking). + int MaxSplit = 1; + if (RVT.getScalarSizeInBits() % 8 == 0) + MaxSplit = RVT.getScalarSizeInBits() / 8; + + for (int Split = 1; Split <= MaxSplit; ++Split) + if (RVT.getScalarSizeInBits() % Split == 0) + if (SDValue S = BuildClearMask(Split)) + return S; return SDValue(); } @@ -13041,60 +13741,17 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); + SDValue Ops[] = {LHS, RHS}; + // See if we can constant fold the vector operation. + if (SDValue Fold = DAG.FoldConstantVectorArithmetic( + N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags())) + return Fold; + + // Try to convert a constant mask AND into a shuffle clear mask. if (SDValue Shuffle = XformToShuffleWithZero(N)) return Shuffle; - // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold - // this operation. - if (LHS.getOpcode() == ISD::BUILD_VECTOR && - RHS.getOpcode() == ISD::BUILD_VECTOR) { - // Check if both vectors are constants. If not bail out. - if (!(cast<BuildVectorSDNode>(LHS)->isConstant() && - cast<BuildVectorSDNode>(RHS)->isConstant())) - return SDValue(); - - SmallVector<SDValue, 8> Ops; - for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { - SDValue LHSOp = LHS.getOperand(i); - SDValue RHSOp = RHS.getOperand(i); - - // Can't fold divide by zero. - if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || - N->getOpcode() == ISD::FDIV) { - if (isNullConstant(RHSOp) || (RHSOp.getOpcode() == ISD::ConstantFP && - cast<ConstantFPSDNode>(RHSOp.getNode())->isZero())) - break; - } - - EVT VT = LHSOp.getValueType(); - EVT RVT = RHSOp.getValueType(); - if (RVT != VT) { - // Integer BUILD_VECTOR operands may have types larger than the element - // size (e.g., when the element type is not legal). Prior to type - // legalization, the types may not match between the two BUILD_VECTORS. - // Truncate one of the operands to make them match. - if (RVT.getSizeInBits() > VT.getSizeInBits()) { - RHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, RHSOp); - } else { - LHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), RVT, LHSOp); - VT = RVT; - } - } - SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT, - LHSOp, RHSOp); - if (FoldOp.getOpcode() != ISD::UNDEF && - FoldOp.getOpcode() != ISD::Constant && - FoldOp.getOpcode() != ISD::ConstantFP) - break; - Ops.push_back(FoldOp); - AddToWorklist(FoldOp.getNode()); - } - - if (Ops.size() == LHS.getNumOperands()) - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops); - } - // Type legalization might introduce new shuffles in the DAG. // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask))) // -> (shuffle (VBinOp (A, B)), Undef, Mask). @@ -13109,7 +13766,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { EVT VT = N->getValueType(0); SDValue UndefVector = LHS.getOperand(1); SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT, - LHS.getOperand(0), RHS.getOperand(0)); + LHS.getOperand(0), RHS.getOperand(0), + N->getFlags()); AddUsersToWorklist(N); return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, &SVN0->getMask()[0]); @@ -13390,9 +14048,10 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset); AddToWorklist(CPIdx.getNode()); - return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), false, - false, false, Alignment); + return DAG.getLoad( + TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, Alignment); } } @@ -13481,8 +14140,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // Get a SetCC of the condition // NOTE: Don't create a SETCC if it's not legal on this target. if (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, - LegalTypes ? getSetCCResultType(N0.getValueType()) : MVT::i1)) { + TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) { SDValue Temp, SCC; // cast from setcc result type to select result type if (LegalTypes) { @@ -13514,51 +14172,6 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, } } - // Check to see if this is the equivalent of setcc - // FIXME: Turn all of these into setcc if setcc if setcc is legal - // otherwise, go ahead with the folds. - if (0 && isNullConstant(N3) && isOneConstant(N2)) { - EVT XType = N0.getValueType(); - if (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) { - SDValue Res = DAG.getSetCC(DL, getSetCCResultType(XType), N0, N1, CC); - if (Res.getValueType() != VT) - Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res); - return Res; - } - - // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X)))) - if (isNullConstant(N1) && CC == ISD::SETEQ && - (!LegalOperations || - TLI.isOperationLegal(ISD::CTLZ, XType))) { - SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0); - return DAG.getNode(ISD::SRL, DL, XType, Ctlz, - DAG.getConstant(Log2_32(XType.getSizeInBits()), - SDLoc(Ctlz), - getShiftAmountTy(Ctlz.getValueType()))); - } - // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) - if (isNullConstant(N1) && CC == ISD::SETGT) { - SDLoc DL(N0); - SDValue NegN0 = DAG.getNode(ISD::SUB, DL, - XType, DAG.getConstant(0, DL, XType), N0); - SDValue NotN0 = DAG.getNOT(DL, N0, XType); - return DAG.getNode(ISD::SRL, DL, XType, - DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), - DAG.getConstant(XType.getSizeInBits() - 1, DL, - getShiftAmountTy(XType))); - } - // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) - if (isAllOnesConstant(N1) && CC == ISD::SETGT) { - SDLoc DL(N0); - SDValue Sign = DAG.getNode(ISD::SRL, DL, XType, N0, - DAG.getConstant(XType.getSizeInBits() - 1, DL, - getShiftAmountTy(N0.getValueType()))); - return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, DL, - XType)); - } - } - // Check to see if this is an integer abs. // select_cc setg[te] X, 0, X, -X -> // select_cc setgt X, -1, X, -X -> @@ -13666,7 +14279,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { return S; } -SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) { +SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) { if (Level >= AfterLegalizeDAG) return SDValue(); @@ -13690,16 +14303,16 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) { // Newton iterations: Est = Est + Est (1 - Arg * Est) for (unsigned i = 0; i < Iterations; ++i) { - SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est); + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags); AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst); + NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags); AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); + NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); AddToWorklist(NewEst.getNode()); - Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst); + Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags); AddToWorklist(Est.getNode()); } } @@ -13716,31 +14329,32 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) { /// X_{i+1} = X_i (1.5 - A X_i^2 / 2) /// As a result, we precompute A/2 prior to the iteration loop. SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, - unsigned Iterations) { + unsigned Iterations, + SDNodeFlags *Flags) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that // this entire sequence requires only one FP constant. - SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg); + SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); AddToWorklist(HalfArg.getNode()); - HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg); + HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); AddToWorklist(HalfArg.getNode()); // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { - SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst); + NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst); + NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); AddToWorklist(NewEst.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); AddToWorklist(Est.getNode()); } return Est; @@ -13752,7 +14366,8 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, /// => /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est, - unsigned Iterations) { + unsigned Iterations, + SDNodeFlags *Flags) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); @@ -13760,25 +14375,25 @@ SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est, // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { - SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf); + SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); AddToWorklist(HalfEst.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); AddToWorklist(Est.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); AddToWorklist(Est.getNode()); - Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree); + Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree, Flags); AddToWorklist(Est.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst, Flags); AddToWorklist(Est.getNode()); } return Est; } -SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) { +SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { if (Level >= AfterLegalizeDAG) return SDValue(); @@ -13790,8 +14405,8 @@ SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) { AddToWorklist(Est.getNode()); if (Iterations) { Est = UseOneConstNR ? - BuildRsqrtNROneConst(Op, Est, Iterations) : - BuildRsqrtNRTwoConst(Op, Est, Iterations); + BuildRsqrtNROneConst(Op, Est, Iterations, Flags) : + BuildRsqrtNRTwoConst(Op, Est, Iterations, Flags); } return Est; } @@ -13955,14 +14570,12 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, SDValue Chain = Chains.pop_back_val(); // For TokenFactor nodes, look at each operand and only continue up the - // chain until we find two aliases. If we've seen two aliases, assume we'll - // find more and revert to original chain since the xform is unlikely to be - // profitable. + // chain until we reach the depth limit. // // FIXME: The depth check could be made to return the last non-aliasing // chain we found before we hit a tokenfactor rather than the original // chain. - if (Depth > 6 || Aliases.size() == 2) { + if (Depth > TLI.getGatherAllAliasesMaxDepth()) { Aliases.clear(); Aliases.push_back(OriginalChain); return; @@ -14094,6 +14707,83 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); } +bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { + // This holds the base pointer, index, and the offset in bytes from the base + // pointer. + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); + + // We must have a base and an offset. + if (!BasePtr.Base.getNode()) + return false; + + // Do not handle stores to undef base pointers. + if (BasePtr.Base.getOpcode() == ISD::UNDEF) + return false; + + SmallVector<StoreSDNode *, 8> ChainedStores; + ChainedStores.push_back(St); + + // Walk up the chain and look for nodes with offsets from the same + // base pointer. Stop when reaching an instruction with a different kind + // or instruction which has a different base pointer. + StoreSDNode *Index = St; + while (Index) { + // If the chain has more than one use, then we can't reorder the mem ops. + if (Index != St && !SDValue(Index, 0)->hasOneUse()) + break; + + if (Index->isVolatile() || Index->isIndexed()) + break; + + // Find the base pointer and offset for this memory node. + BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); + + // Check that the base pointer is the same as the original one. + if (!Ptr.equalBaseIndex(BasePtr)) + break; + + // Find the next memory operand in the chain. If the next operand in the + // chain is a store then move up and continue the scan with the next + // memory operand. If the next operand is a load save it and use alias + // information to check if it interferes with anything. + SDNode *NextInChain = Index->getChain().getNode(); + while (true) { + if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) { + // We found a store node. Use it for the next iteration. + ChainedStores.push_back(STn); + Index = STn; + break; + } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) { + NextInChain = Ldn->getChain().getNode(); + continue; + } else { + Index = nullptr; + break; + } + } + } + + bool MadeChange = false; + SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains; + + for (StoreSDNode *ChainedStore : ChainedStores) { + SDValue Chain = ChainedStore->getChain(); + SDValue BetterChain = FindBetterChain(ChainedStore, Chain); + + if (Chain != BetterChain) { + MadeChange = true; + BetterChains.push_back(std::make_pair(ChainedStore, BetterChain)); + } + } + + // Do all replacements after finding the replacements to make to avoid making + // the chains more complicated by introducing new TokenFactors. + for (auto Replacement : BetterChains) + replaceStoreChain(Replacement.first, Replacement.second); + + return MadeChange; +} + /// This is the entry point for the file. void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, CodeGenOpt::Level OptLevel) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 2b9ba2c1b534..cfbb20947acc 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -118,9 +118,9 @@ bool FastISel::lowerArguments() { for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(), E = FuncInfo.Fn->arg_end(); I != E; ++I) { - DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I); + DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(&*I); assert(VI != LocalValueMap.end() && "Missed an argument?"); - FuncInfo.ValueMap[I] = VI->second; + FuncInfo.ValueMap[&*I] = VI->second; } return true; } @@ -611,7 +611,7 @@ bool FastISel::selectStackmap(const CallInst *I) { // have to worry about calling conventions and target-specific lowering code. // Instead we perform the call lowering right here. // - // CALLSEQ_START(0) + // CALLSEQ_START(0...) // STACKMAP(id, nbytes, ...) // CALLSEQ_END(0, 0) // @@ -647,8 +647,11 @@ bool FastISel::selectStackmap(const CallInst *I) { // Issue CALLSEQ_START unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) - .addImm(0); + auto Builder = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)); + const MCInstrDesc &MCID = Builder.getInstr()->getDesc(); + for (unsigned I = 0, E = MCID.getNumOperands(); I < E; ++I) + Builder.addImm(0); // Issue STACKMAP. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -1100,13 +1103,6 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { // The donothing intrinsic does, well, nothing. case Intrinsic::donothing: return true; - case Intrinsic::eh_actions: { - unsigned ResultReg = getRegForValue(UndefValue::get(II->getType())); - if (!ResultReg) - return false; - updateValueMap(II, ResultReg); - return true; - } case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast<DbgDeclareInst>(II); assert(DI->getVariable() && "Missing variable"); @@ -1326,12 +1322,38 @@ bool FastISel::selectBitCast(const User *I) { return true; } +// Remove local value instructions starting from the instruction after +// SavedLastLocalValue to the current function insert point. +void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue) +{ + MachineInstr *CurLastLocalValue = getLastLocalValue(); + if (CurLastLocalValue != SavedLastLocalValue) { + // Find the first local value instruction to be deleted. + // This is the instruction after SavedLastLocalValue if it is non-NULL. + // Otherwise it's the first instruction in the block. + MachineBasicBlock::iterator FirstDeadInst(SavedLastLocalValue); + if (SavedLastLocalValue) + ++FirstDeadInst; + else + FirstDeadInst = FuncInfo.MBB->getFirstNonPHI(); + setLastLocalValue(SavedLastLocalValue); + removeDeadCode(FirstDeadInst, FuncInfo.InsertPt); + } +} + bool FastISel::selectInstruction(const Instruction *I) { + MachineInstr *SavedLastLocalValue = getLastLocalValue(); // Just before the terminator instruction, insert instructions to // feed PHI nodes in successor blocks. if (isa<TerminatorInst>(I)) - if (!handlePHINodesInSuccessorBlocks(I->getParent())) + if (!handlePHINodesInSuccessorBlocks(I->getParent())) { + // PHI node handling may have generated local value instructions, + // even though it failed to handle all PHI nodes. + // We remove these instructions because SelectionDAGISel will generate + // them again. + removeDeadLocalValueCode(SavedLastLocalValue); return false; + } DbgLoc = I->getDebugLoc(); @@ -1348,7 +1370,7 @@ bool FastISel::selectInstruction(const Instruction *I) { LibInfo->hasOptimizedCodeGen(Func)) return false; - // Don't handle Intrinsic::trap if a trap funciton is specified. + // Don't handle Intrinsic::trap if a trap function is specified. if (F && F->getIntrinsicID() == Intrinsic::trap && Call->hasFnAttr("trap-func-name")) return false; @@ -1380,8 +1402,12 @@ bool FastISel::selectInstruction(const Instruction *I) { DbgLoc = DebugLoc(); // Undo phi node updates, because they will be added again by SelectionDAG. - if (isa<TerminatorInst>(I)) + if (isa<TerminatorInst>(I)) { + // PHI node handling may have generated local value instructions. + // We remove them because SelectionDAGISel will generate them again. + removeDeadLocalValueCode(SavedLastLocalValue); FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); + } return false; } @@ -1398,11 +1424,30 @@ void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { TII.InsertBranch(*FuncInfo.MBB, MSucc, nullptr, SmallVector<MachineOperand, 0>(), DbgLoc); } - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(FuncInfo.MBB->getBasicBlock(), - MSucc->getBasicBlock()); - FuncInfo.MBB->addSuccessor(MSucc, BranchWeight); + if (FuncInfo.BPI) { + auto BranchProbability = FuncInfo.BPI->getEdgeProbability( + FuncInfo.MBB->getBasicBlock(), MSucc->getBasicBlock()); + FuncInfo.MBB->addSuccessor(MSucc, BranchProbability); + } else + FuncInfo.MBB->addSuccessorWithoutProb(MSucc); +} + +void FastISel::finishCondBranch(const BasicBlock *BranchBB, + MachineBasicBlock *TrueMBB, + MachineBasicBlock *FalseMBB) { + // Add TrueMBB as successor unless it is equal to the FalseMBB: This can + // happen in degenerate IR and MachineIR forbids to have a block twice in the + // successor/predecessor lists. + if (TrueMBB != FalseMBB) { + if (FuncInfo.BPI) { + auto BranchProbability = + FuncInfo.BPI->getEdgeProbability(BranchBB, TrueMBB->getBasicBlock()); + FuncInfo.MBB->addSuccessor(TrueMBB, BranchProbability); + } else + FuncInfo.MBB->addSuccessorWithoutProb(TrueMBB); + } + + fastEmitBranch(FalseMBB, DbgLoc); } /// Emit an FNeg operation. @@ -1864,21 +1909,18 @@ unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_rf(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, unsigned Op0, - bool Op0IsKill, const ConstantFP *FPImm) { +unsigned FastISel::fastEmitInst_f(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + const ConstantFP *FPImm) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); - Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, getKillRegState(Op0IsKill)) .addFPImm(FPImm); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, getKillRegState(Op0IsKill)) .addFPImm(FPImm); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); @@ -1912,35 +1954,6 @@ unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_rrii(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, unsigned Op1, - bool Op1IsKill, uint64_t Imm1, - uint64_t Imm2) { - const MCInstrDesc &II = TII.get(MachineInstOpcode); - - unsigned ResultReg = createResultReg(RC); - Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); - Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); - - if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, getKillRegState(Op0IsKill)) - .addReg(Op1, getKillRegState(Op1IsKill)) - .addImm(Imm1) - .addImm(Imm2); - else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, getKillRegState(Op0IsKill)) - .addReg(Op1, getKillRegState(Op1IsKill)) - .addImm(Imm1) - .addImm(Imm2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); - } - return ResultReg; -} - unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); @@ -1957,25 +1970,6 @@ unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_ii(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, uint64_t Imm1, - uint64_t Imm2) { - unsigned ResultReg = createResultReg(RC); - const MCInstrDesc &II = TII.get(MachineInstOpcode); - - if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addImm(Imm1) - .addImm(Imm2); - else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm1) - .addImm(Imm2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); - } - return ResultReg; -} - unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx) { unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index cc306cbf5ae4..b62bd2bd63ee 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -87,6 +87,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); MachineModuleInfo &MMI = MF->getMMI(); + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; @@ -103,28 +104,29 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) { - // Static allocas can be folded into the initial stack frame adjustment. - if (AI->isStaticAlloca()) { + Type *Ty = AI->getAllocatedType(); + unsigned Align = + std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty), + AI->getAlignment()); + unsigned StackAlign = TFI->getStackAlignment(); + + // Static allocas can be folded into the initial stack frame + // adjustment. For targets that don't realign the stack, don't + // do this if there is an extra alignment requirement. + if (AI->isStaticAlloca() && + (TFI->isStackRealignable() || (Align <= StackAlign))) { const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize()); - Type *Ty = AI->getAllocatedType(); uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty); - unsigned Align = - std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty), - AI->getAlignment()); TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. StaticAllocaMap[AI] = MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); - } else { - unsigned Align = - std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment( - AI->getAllocatedType()), - AI->getAlignment()); - unsigned StackAlign = - MF->getSubtarget().getFrameLowering()->getStackAlignment(); + // FIXME: Overaligned static allocas should be grouped into + // a single dynamic allocation instead of using a separate + // stack allocation for each one. if (Align <= StackAlign) Align = 0; // Inform the Frame Information that we have variable-sized objects. @@ -134,7 +136,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Look for inline asm that clobbers the SP register. if (isa<CallInst>(I) || isa<InvokeInst>(I)) { - ImmutableCallSite CS(I); + ImmutableCallSite CS(&*I); if (isa<InlineAsm>(CS.getCalledValue())) { unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); @@ -163,7 +165,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, MF->getFrameInfo()->setHasVAStart(true); } - // If we have a musttail call in a variadic funciton, we need to ensure we + // If we have a musttail call in a variadic function, we need to ensure we // forward implicit register parameters. if (const auto *CI = dyn_cast<CallInst>(I)) { if (CI->isMustTailCall() && Fn->isVarArg()) @@ -172,10 +174,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Mark values used outside their block as exported, by allocating // a virtual register for them. - if (isUsedOutsideOfDefiningBlock(I)) - if (!isa<AllocaInst>(I) || - !StaticAllocaMap.count(cast<AllocaInst>(I))) - InitializeRegForValue(I); + if (isUsedOutsideOfDefiningBlock(&*I)) + if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(I))) + InitializeRegForValue(&*I); // Collect llvm.dbg.declare information. This is done now instead of // during the initial isel pass through the IR so that it is done @@ -205,15 +206,36 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, } // Decide the preferred extend type for a value. - PreferredExtendType[I] = getPreferredExtendForValue(I); + PreferredExtendType[&*I] = getPreferredExtendForValue(&*I); } // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This // also creates the initial PHI MachineInstrs, though none of the input // operands are populated. for (BB = Fn->begin(); BB != EB; ++BB) { - MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB); - MBBMap[BB] = MBB; + // Don't create MachineBasicBlocks for imaginary EH pad blocks. These blocks + // are really data, and no instructions can live here. + if (BB->isEHPad()) { + const Instruction *I = BB->getFirstNonPHI(); + // If this is a non-landingpad EH pad, mark this function as using + // funclets. + // FIXME: SEH catchpads do not create funclets, so we could avoid setting + // this in such cases in order to improve frame layout. + if (!isa<LandingPadInst>(I)) { + MMI.setHasEHFunclets(true); + MF->getFrameInfo()->setHasOpaqueSPAdjustment(true); + } + if (isa<CatchSwitchInst>(I)) { + assert(&*BB->begin() == I && + "WinEHPrepare failed to remove PHIs from imaginary BBs"); + continue; + } + if (isa<FuncletPadInst>(I)) + assert(&*BB->begin() == I && "WinEHPrepare failed to demote PHIs"); + } + + MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(&*BB); + MBBMap[&*BB] = MBB; MF->push_back(MBB); // Transfer the address-taken flag. This is necessary because there could @@ -252,94 +274,64 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Mark landing pad blocks. SmallVector<const LandingPadInst *, 4> LPads; for (BB = Fn->begin(); BB != EB; ++BB) { - if (const auto *Invoke = dyn_cast<InvokeInst>(BB->getTerminator())) - MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad(); - if (BB->isLandingPad()) - LPads.push_back(BB->getLandingPadInst()); + const Instruction *FNP = BB->getFirstNonPHI(); + if (BB->isEHPad() && MBBMap.count(&*BB)) + MBBMap[&*BB]->setIsEHPad(); + if (const auto *LPI = dyn_cast<LandingPadInst>(FNP)) + LPads.push_back(LPI); } - // If this is an MSVC EH personality, we need to do a bit more work. - EHPersonality Personality = EHPersonality::Unknown; - if (Fn->hasPersonalityFn()) - Personality = classifyEHPersonality(Fn->getPersonalityFn()); - if (!isMSVCEHPersonality(Personality)) + // If this personality uses funclets, we need to do a bit more work. + if (!Fn->hasPersonalityFn()) + return; + EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn()); + if (!isFuncletEHPersonality(Personality)) return; - if (Personality == EHPersonality::MSVC_Win64SEH || - Personality == EHPersonality::MSVC_X86SEH) { - addSEHHandlersForLPads(LPads); - } - - WinEHFuncInfo &EHInfo = MMI.getWinEHFuncInfo(&fn); - if (Personality == EHPersonality::MSVC_CXX) { - const Function *WinEHParentFn = MMI.getWinEHParent(&fn); - calculateWinCXXEHStateNumbers(WinEHParentFn, EHInfo); - } - - // Copy the state numbers to LandingPadInfo for the current function, which - // could be a handler or the parent. This should happen for 32-bit SEH and - // C++ EH. - if (Personality == EHPersonality::MSVC_CXX || - Personality == EHPersonality::MSVC_X86SEH) { - for (const LandingPadInst *LP : LPads) { - MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()]; - MMI.addWinEHState(LPadMBB, EHInfo.LandingPadStateMap[LP]); - } - } -} - -void FunctionLoweringInfo::addSEHHandlersForLPads( - ArrayRef<const LandingPadInst *> LPads) { - MachineModuleInfo &MMI = MF->getMMI(); - - // Iterate over all landing pads with llvm.eh.actions calls. - for (const LandingPadInst *LP : LPads) { - const IntrinsicInst *ActionsCall = - dyn_cast<IntrinsicInst>(LP->getNextNode()); - if (!ActionsCall || - ActionsCall->getIntrinsicID() != Intrinsic::eh_actions) - continue; - - // Parse the llvm.eh.actions call we found. - MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()]; - SmallVector<std::unique_ptr<ActionHandler>, 4> Actions; - parseEHActions(ActionsCall, Actions); - - // Iterate EH actions from most to least precedence, which means - // iterating in reverse. - for (auto I = Actions.rbegin(), E = Actions.rend(); I != E; ++I) { - ActionHandler *Action = I->get(); - if (auto *CH = dyn_cast<CatchHandler>(Action)) { - const auto *Filter = - dyn_cast<Function>(CH->getSelector()->stripPointerCasts()); - assert((Filter || CH->getSelector()->isNullValue()) && - "expected function or catch-all"); - const auto *RecoverBA = - cast<BlockAddress>(CH->getHandlerBlockOrFunc()); - MMI.addSEHCatchHandler(LPadMBB, Filter, RecoverBA); + // Calculate state numbers if we haven't already. + WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo(); + if (Personality == EHPersonality::MSVC_CXX) + calculateWinCXXEHStateNumbers(&fn, EHInfo); + else if (isAsynchronousEHPersonality(Personality)) + calculateSEHStateNumbers(&fn, EHInfo); + else if (Personality == EHPersonality::CoreCLR) + calculateClrEHStateNumbers(&fn, EHInfo); + + calculateCatchReturnSuccessorColors(&fn, EHInfo); + + // Map all BB references in the WinEH data to MBBs. + for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { + for (WinEHHandlerType &H : TBME.HandlerArray) { + if (H.CatchObj.Alloca) { + assert(StaticAllocaMap.count(H.CatchObj.Alloca)); + H.CatchObj.FrameIndex = StaticAllocaMap[H.CatchObj.Alloca]; } else { - assert(isa<CleanupHandler>(Action)); - const auto *Fini = cast<Function>(Action->getHandlerBlockOrFunc()); - MMI.addSEHCleanupHandler(LPadMBB, Fini); + H.CatchObj.FrameIndex = INT_MAX; } + if (H.Handler) + H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()]; } } + for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap) + if (UME.Cleanup) + UME.Cleanup = MBBMap[UME.Cleanup.get<const BasicBlock *>()]; + for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) { + const BasicBlock *BB = UME.Handler.get<const BasicBlock *>(); + UME.Handler = MBBMap[BB]; + } + for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) { + const BasicBlock *BB = CME.Handler.get<const BasicBlock *>(); + CME.Handler = MBBMap[BB]; + } } /// clear - Clear out all the function-specific state. This returns this /// FunctionLoweringInfo to an empty state, ready to be used for a /// different function. void FunctionLoweringInfo::clear() { - assert(CatchInfoFound.size() == CatchInfoLost.size() && - "Not all catch info was assigned to a landing pad!"); - MBBMap.clear(); ValueMap.clear(); StaticAllocaMap.clear(); -#ifndef NDEBUG - CatchInfoLost.clear(); - CatchInfoFound.clear(); -#endif LiveOutRegInfo.clear(); VisitedBBs.clear(); ArgDbgValues.clear(); @@ -520,6 +512,17 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) { return 0; } +unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg( + const Value *CPI, const TargetRegisterClass *RC) { + MachineRegisterInfo &MRI = MF->getRegInfo(); + auto I = CatchPadExceptionPointers.insert({CPI, 0}); + unsigned &VReg = I.first->second; + if (I.second) + VReg = MRI.createVirtualRegister(RC); + assert(VReg && "null vreg in exception pointer table!"); + return VReg; +} + /// ComputeUsesVAFloatArgument - Determine if any floating-point values are /// being passed to this variadic function, and set the MachineModuleInfo's /// usesVAFloatArgument flag if so. This flag is used to emit an undefined @@ -547,10 +550,9 @@ void llvm::ComputeUsesVAFloatArgument(const CallInst &I, /// landingpad instruction and add them to the specified machine module info. void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI, MachineBasicBlock *MBB) { - MMI.addPersonality( - MBB, - cast<Function>( - I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts())); + if (const auto *PF = dyn_cast<Function>( + I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts())) + MMI.addPersonality(PF); if (I.isCleanup()) MMI.addCleanup(MBB); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 5ec10308dc28..a1e2d410ab00 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -139,7 +139,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, UseRC = RC; else if (RC) { const TargetRegisterClass *ComRC = - TRI->getCommonSubClass(UseRC, RC); + TRI->getCommonSubClass(UseRC, RC, VT.SimpleTy); // If multiple uses expect disjoint register classes, we emit // copies in AddRegisterOperand. if (ComRC) diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index fbc8f1e89f6e..f46767f6c4a1 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -39,6 +39,10 @@ using namespace llvm; #define DEBUG_TYPE "legalizedag" +namespace { + +struct FloatSignAsInt; + //===----------------------------------------------------------------------===// /// This takes an arbitrary SelectionDAG as input and /// hacks on it until the target machine can handle it. This involves @@ -51,7 +55,6 @@ using namespace llvm; /// 'setcc' instruction efficiently, but does support 'brcc' instruction, this /// will attempt merge setcc and brc instructions into brcc's. /// -namespace { class SelectionDAGLegalize { const TargetMachine &TM; const TargetLowering &TLI; @@ -130,7 +133,11 @@ private: SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); void ExpandDYNAMIC_STACKALLOC(SDNode *Node, SmallVectorImpl<SDValue> &Results); - SDValue ExpandFCOPYSIGN(SDNode *Node); + void getSignAsIntValue(FloatSignAsInt &State, SDLoc DL, SDValue Value) const; + SDValue modifySignAsInt(const FloatSignAsInt &State, SDLoc DL, + SDValue NewIntValue) const; + SDValue ExpandFCOPYSIGN(SDNode *Node) const; + SDValue ExpandFABS(SDNode *Node) const; SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT, SDLoc dl); SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, @@ -138,6 +145,7 @@ private: SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, SDLoc dl); + SDValue ExpandBITREVERSE(SDValue Op, SDLoc dl); SDValue ExpandBSWAP(SDValue Op, SDLoc dl); SDValue ExpandBitCount(unsigned Opc, SDValue Op, SDLoc dl); @@ -146,10 +154,11 @@ private: SDValue ExpandVectorBuildThroughStack(SDNode* Node); SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP); + SDValue ExpandConstant(ConstantSDNode *CP); - std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); - - void ExpandNode(SDNode *Node); + // if ExpandNode returns false, LegalizeOp falls back to ConvertNodeToLibcall + bool ExpandNode(SDNode *Node); + void ConvertNodeToLibcall(SDNode *Node); void PromoteNode(SDNode *Node); public: @@ -273,17 +282,30 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { DAG.getConstantPool(LLVMC, TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); if (Extend) { - SDValue Result = - DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, - DAG.getEntryNode(), - CPIdx, MachinePointerInfo::getConstantPool(), - VT, false, false, false, Alignment); + SDValue Result = DAG.getExtLoad( + ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), VT, + false, false, false, Alignment); return Result; } SDValue Result = - DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), false, false, false, - Alignment); + DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, Alignment); + return Result; +} + +/// Expands the Constant node to a load from the constant pool. +SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) { + SDLoc dl(CP); + EVT VT = CP->getValueType(0); + SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(), + TLI.getPointerTy(DAG.getDataLayout())); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + SDValue Result = + DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, Alignment); return Result; } @@ -594,13 +616,13 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); // Store the vector. - SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr, - MachinePointerInfo::getFixedStack(SPFI), - false, false, 0); + SDValue Ch = DAG.getStore( + DAG.getEntryNode(), dl, Tmp1, StackPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), false, + false, 0); // Truncate or zero extend offset to target pointer type. - unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; - Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3); + Tmp3 = DAG.getZExtOrTrunc(Tmp3, dl, PtrVT); // Add the offset to the index. unsigned EltSize = EltVT.getSizeInBits()/8; Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3, @@ -610,9 +632,9 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT, false, false, 0); // Load the updated vector. - return DAG.getLoad(VT, dl, Ch, StackPtr, - MachinePointerInfo::getFixedStack(SPFI), false, false, - false, 0); + return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), SPFI), + false, false, false, 0); } @@ -728,14 +750,12 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { case TargetLowering::Legal: { // If this is an unaligned store and the target doesn't support it, // expand it. + EVT MemVT = ST->getMemoryVT(); unsigned AS = ST->getAddressSpace(); unsigned Align = ST->getAlignment(); - if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) { - Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); - if (Align < ABIAlignment) - ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); - } + const DataLayout &DL = DAG.getDataLayout(); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) + ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); break; } case TargetLowering::Custom: { @@ -839,20 +859,16 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); ReplaceNode(SDValue(Node, 0), Result); } else { - switch (TLI.getTruncStoreAction(ST->getValue().getSimpleValueType(), - StVT.getSimpleVT())) { + switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { + EVT MemVT = ST->getMemoryVT(); unsigned AS = ST->getAddressSpace(); unsigned Align = ST->getAlignment(); // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) { - Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = DL.getABITypeAlignment(Ty); - if (Align < ABIAlignment) - ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); - } + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) + ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); break; } case TargetLowering::Custom: { @@ -895,17 +911,14 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { switch (TLI.getOperationAction(Node->getOpcode(), VT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { + EVT MemVT = LD->getMemoryVT(); unsigned AS = LD->getAddressSpace(); unsigned Align = LD->getAlignment(); + const DataLayout &DL = DAG.getDataLayout(); // If this is an unaligned load and the target doesn't support it, // expand it. - if (!TLI.allowsMisalignedMemoryAccesses(LD->getMemoryVT(), AS, Align)) { - Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); - if (Align < ABIAlignment){ - ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain); - } - } + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) + ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain); break; } case TargetLowering::Custom: { @@ -1092,23 +1105,20 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Chain = Res.getValue(1); } } else { - // If this is an unaligned load and the target doesn't support - // it, expand it. + // If this is an unaligned load and the target doesn't support it, + // expand it. EVT MemVT = LD->getMemoryVT(); unsigned AS = LD->getAddressSpace(); unsigned Align = LD->getAlignment(); - if (!TLI.allowsMisalignedMemoryAccesses(MemVT, AS, Align)) { - Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); - if (Align < ABIAlignment){ - ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain); - } - } + const DataLayout &DL = DAG.getDataLayout(); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) + ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain); } break; } case TargetLowering::Expand: - if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0), SrcVT)) { + EVT DestVT = Node->getValueType(0); + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) { // If the source type is not legal, see if there is a legal extload to // an intermediate type that we can then extend further. EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT()); @@ -1127,6 +1137,23 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Chain = Load.getValue(1); break; } + + // Handle the special case of fp16 extloads. EXTLOAD doesn't have the + // normal undefined upper bits behavior to allow using an in-reg extend + // with the illegal FP type, so load as an integer and do the + // from-integer conversion. + if (SrcVT.getScalarType() == MVT::f16) { + EVT ISrcVT = SrcVT.changeTypeToInteger(); + EVT IDestVT = DestVT.changeTypeToInteger(); + EVT LoadVT = TLI.getRegisterType(IDestVT.getSimpleVT()); + + SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, LoadVT, + Chain, Ptr, ISrcVT, + LD->getMemOperand()); + Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result); + Chain = Result.getValue(1); + break; + } } assert(!SrcVT.isVector() && @@ -1180,15 +1207,17 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { #ifndef NDEBUG for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == - TargetLowering::TypeLegal && + assert((TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == + TargetLowering::TypeLegal || + TLI.isTypeLegal(Node->getValueType(i))) && "Unexpected illegal type!"); for (const SDValue &Op : Node->op_values()) - assert((TLI.getTypeAction(*DAG.getContext(), - Op.getValueType()) == TargetLowering::TypeLegal || - Op.getOpcode() == ISD::TargetConstant) && - "Unexpected illegal type!"); + assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == + TargetLowering::TypeLegal || + TLI.isTypeLegal(Op.getValueType()) || + Op.getOpcode() == ISD::TargetConstant) && + "Unexpected illegal type!"); #endif // Figure out the correct action; the way to query this varies by opcode @@ -1201,6 +1230,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::STACKSAVE: Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); break; + case ISD::GET_DYNAMIC_AREA_OFFSET: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getValueType(0)); + break; case ISD::VAARG: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); @@ -1229,7 +1262,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::SETCC: case ISD::BR_CC: { unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : - Node->getOpcode() == ISD::SETCC ? 2 : 1; + Node->getOpcode() == ISD::SETCC ? 2 : + Node->getOpcode() == ISD::SETCCE ? 3 : 1; unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType(); ISD::CondCode CCCode = @@ -1265,6 +1299,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::FRAME_TO_ARGS_OFFSET: case ISD::EH_SJLJ_SETJMP: case ISD::EH_SJLJ_LONGJMP: + case ISD::EH_SJLJ_SETUP_DISPATCH: // These operations lie about being legal: when they claim to be legal, // they should actually be expanded. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); @@ -1281,6 +1316,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (Action == TargetLowering::Legal) Action = TargetLowering::Custom; break; + case ISD::READCYCLECOUNTER: + // READCYCLECOUNTER returns an i64, even if type legalization might have + // expanded that to several smaller types. + Action = TLI.getOperationAction(Node->getOpcode(), MVT::i64); + break; case ISD::READ_REGISTER: case ISD::WRITE_REGISTER: // Named register is legal in the DAG, but blocked by register name @@ -1379,7 +1419,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } // FALL THROUGH case TargetLowering::Expand: - ExpandNode(Node); + if (ExpandNode(Node)) + return; + // FALL THROUGH + case TargetLowering::LibCall: + ConvertNodeToLibcall(Node); return; case TargetLowering::Promote: PromoteNode(Node); @@ -1419,6 +1463,11 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { // series of EXTRACT_VECTOR_ELT nodes are generated, one for each element in // the vector. If all are expanded here, we don't want one store per vector // element. + + // Caches for hasPredecessorHelper + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 16> Worklist; + SDValue StackPtr, Ch; for (SDNode::use_iterator UI = Vec.getNode()->use_begin(), UE = Vec.getNode()->use_end(); UI != UE; ++UI) { @@ -1433,6 +1482,12 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { if (!ST->getChain().reachesChainWithoutSideEffects(DAG.getEntryNode())) continue; + // If the index is dependent on the store we will introduce a cycle when + // creating the load (the load uses the index, and by replacing the chain + // we will make the index dependent on the load). + if (Idx.getNode()->hasPredecessorHelper(ST, Visited, Worklist)) + continue; + StackPtr = ST->getBasePtr(); Ch = SDValue(ST, 0); break; @@ -1490,7 +1545,8 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); - MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); // First store the whole vector. SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, @@ -1528,7 +1584,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { SDLoc dl(Node); SDValue FIPtr = DAG.CreateStackTemporary(VT); int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); - MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); // Emit a store of each element to the stack slot. SmallVector<SDValue, 8> Stores; @@ -1568,69 +1625,143 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { false, false, false, 0); } -SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { - SDLoc dl(Node); - SDValue Tmp1 = Node->getOperand(0); - SDValue Tmp2 = Node->getOperand(1); - - // Get the sign bit of the RHS. First obtain a value that has the same - // sign as the sign bit, i.e. negative if and only if the sign bit is 1. - SDValue SignBit; - EVT FloatVT = Tmp2.getValueType(); - EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits()); +namespace { +/// Keeps track of state when getting the sign of a floating-point value as an +/// integer. +struct FloatSignAsInt { + EVT FloatVT; + SDValue Chain; + SDValue FloatPtr; + SDValue IntPtr; + MachinePointerInfo IntPointerInfo; + MachinePointerInfo FloatPointerInfo; + SDValue IntValue; + APInt SignMask; +}; +} + +/// Bitcast a floating-point value to an integer value. Only bitcast the part +/// containing the sign bit if the target has no integer value capable of +/// holding all bits of the floating-point value. +void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, + SDLoc DL, SDValue Value) const { + EVT FloatVT = Value.getValueType(); + unsigned NumBits = FloatVT.getSizeInBits(); + State.FloatVT = FloatVT; + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), NumBits); + // Convert to an integer of the same size. if (TLI.isTypeLegal(IVT)) { - // Convert to an integer with the same sign bit. - SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2); + State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value); + State.SignMask = APInt::getSignBit(NumBits); + return; + } + + auto &DataLayout = DAG.getDataLayout(); + // Store the float to memory, then load the sign part out as an integer. + MVT LoadTy = TLI.getRegisterType(*DAG.getContext(), MVT::i8); + // First create a temporary that is aligned for both the load and store. + SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy); + int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + // Then store the float to it. + State.FloatPtr = StackPtr; + MachineFunction &MF = DAG.getMachineFunction(); + State.FloatPointerInfo = MachinePointerInfo::getFixedStack(MF, FI); + State.Chain = DAG.getStore(DAG.getEntryNode(), DL, Value, State.FloatPtr, + State.FloatPointerInfo, false, false, 0); + + SDValue IntPtr; + if (DataLayout.isBigEndian()) { + assert(FloatVT.isByteSized() && "Unsupported floating point type!"); + // Load out a legal integer with the same sign bit as the float. + IntPtr = StackPtr; + State.IntPointerInfo = State.FloatPointerInfo; } else { - auto &DL = DAG.getDataLayout(); - // Store the float to memory, then load the sign part out as an integer. - MVT LoadTy = TLI.getPointerTy(DL); - // First create a temporary that is aligned for both the load and store. - SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy); - // Then store the float to it. - SDValue Ch = - DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(), - false, false, 0); - if (DL.isBigEndian()) { - assert(FloatVT.isByteSized() && "Unsupported floating point type!"); - // Load out a legal integer with the same sign bit as the float. - SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(), - false, false, false, 0); - } else { // Little endian - SDValue LoadPtr = StackPtr; - // The float may be wider than the integer we are going to load. Advance - // the pointer so that the loaded integer will contain the sign bit. - unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits(); - unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8; - LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), LoadPtr, - DAG.getConstant(ByteOffset, dl, - LoadPtr.getValueType())); - // Load a legal integer containing the sign bit. - SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(), - false, false, false, 0); - // Move the sign bit to the top bit of the loaded integer. - unsigned BitShift = LoadTy.getSizeInBits() - - (FloatVT.getSizeInBits() - 8 * ByteOffset); - assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?"); - if (BitShift) - SignBit = DAG.getNode( - ISD::SHL, dl, LoadTy, SignBit, - DAG.getConstant(BitShift, dl, - TLI.getShiftAmountTy(SignBit.getValueType(), DL))); - } + // Advance the pointer so that the loaded byte will contain the sign bit. + unsigned ByteOffset = (FloatVT.getSizeInBits() / 8) - 1; + IntPtr = DAG.getNode(ISD::ADD, DL, StackPtr.getValueType(), StackPtr, + DAG.getConstant(ByteOffset, DL, StackPtr.getValueType())); + State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI, + ByteOffset); } - // Now get the sign bit proper, by seeing whether the value is negative. - SignBit = DAG.getSetCC(dl, getSetCCResultType(SignBit.getValueType()), - SignBit, - DAG.getConstant(0, dl, SignBit.getValueType()), - ISD::SETLT); - // Get the absolute value of the result. - SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1); - // Select between the nabs and abs value based on the sign bit of - // the input. - return DAG.getSelect(dl, AbsVal.getValueType(), SignBit, - DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), - AbsVal); + + State.IntPtr = IntPtr; + State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain, + IntPtr, State.IntPointerInfo, MVT::i8, + false, false, false, 0); + State.SignMask = APInt::getOneBitSet(LoadTy.getSizeInBits(), 7); +} + +/// Replace the integer value produced by getSignAsIntValue() with a new value +/// and cast the result back to a floating-point type. +SDValue SelectionDAGLegalize::modifySignAsInt(const FloatSignAsInt &State, + SDLoc DL, SDValue NewIntValue) const { + if (!State.Chain) + return DAG.getNode(ISD::BITCAST, DL, State.FloatVT, NewIntValue); + + // Override the part containing the sign bit in the value stored on the stack. + SDValue Chain = DAG.getTruncStore(State.Chain, DL, NewIntValue, State.IntPtr, + State.IntPointerInfo, MVT::i8, false, false, + 0); + return DAG.getLoad(State.FloatVT, DL, Chain, State.FloatPtr, + State.FloatPointerInfo, false, false, false, 0); +} + +SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const { + SDLoc DL(Node); + SDValue Mag = Node->getOperand(0); + SDValue Sign = Node->getOperand(1); + + // Get sign bit into an integer value. + FloatSignAsInt SignAsInt; + getSignAsIntValue(SignAsInt, DL, Sign); + + EVT IntVT = SignAsInt.IntValue.getValueType(); + SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT); + SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, SignAsInt.IntValue, + SignMask); + + // If FABS is legal transform FCOPYSIGN(x, y) => sign(x) ? -FABS(x) : FABS(X) + EVT FloatVT = Mag.getValueType(); + if (TLI.isOperationLegalOrCustom(ISD::FABS, FloatVT) && + TLI.isOperationLegalOrCustom(ISD::FNEG, FloatVT)) { + SDValue AbsValue = DAG.getNode(ISD::FABS, DL, FloatVT, Mag); + SDValue NegValue = DAG.getNode(ISD::FNEG, DL, FloatVT, AbsValue); + SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(IntVT), SignBit, + DAG.getConstant(0, DL, IntVT), ISD::SETNE); + return DAG.getSelect(DL, FloatVT, Cond, NegValue, AbsValue); + } + + // Transform values to integer, copy the sign bit and transform back. + FloatSignAsInt MagAsInt; + getSignAsIntValue(MagAsInt, DL, Mag); + assert(SignAsInt.SignMask == MagAsInt.SignMask); + SDValue ClearSignMask = DAG.getConstant(~SignAsInt.SignMask, DL, IntVT); + SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, MagAsInt.IntValue, + ClearSignMask); + SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit); + + return modifySignAsInt(MagAsInt, DL, CopiedSign); +} + +SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const { + SDLoc DL(Node); + SDValue Value = Node->getOperand(0); + + // Transform FABS(x) => FCOPYSIGN(x, 0.0) if FCOPYSIGN is legal. + EVT FloatVT = Value.getValueType(); + if (TLI.isOperationLegalOrCustom(ISD::FCOPYSIGN, FloatVT)) { + SDValue Zero = DAG.getConstantFP(0.0, DL, FloatVT); + return DAG.getNode(ISD::FCOPYSIGN, DL, FloatVT, Value, Zero); + } + + // Transform value to integer, clear the sign bit and transform back. + FloatSignAsInt ValueAsInt; + getSignAsIntValue(ValueAsInt, DL, Value); + EVT IntVT = ValueAsInt.IntValue.getValueType(); + SDValue ClearSignMask = DAG.getConstant(~ValueAsInt.SignMask, DL, IntVT); + SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, ValueAsInt.IntValue, + ClearSignMask); + return modifySignAsInt(ValueAsInt, DL, ClearedSign); } void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, @@ -1798,7 +1929,8 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr); int SPFI = StackPtrFI->getIndex(); - MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); unsigned SrcSize = SrcOp.getValueType().getSizeInBits(); unsigned SlotSize = SlotVT.getSizeInBits(); @@ -1838,14 +1970,14 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr); int SPFI = StackPtrFI->getIndex(); - SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0), - StackPtr, - MachinePointerInfo::getFixedStack(SPFI), - Node->getValueType(0).getVectorElementType(), - false, false, 0); - return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr, - MachinePointerInfo::getFixedStack(SPFI), - false, false, false, 0); + SDValue Ch = DAG.getTruncStore( + DAG.getEntryNode(), dl, Node->getOperand(0), StackPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), + Node->getValueType(0).getVectorElementType(), false, false, 0); + return DAG.getLoad( + Node->getValueType(0), dl, Ch, StackPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), false, + false, false, 0); } static bool @@ -2011,9 +2143,10 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); - return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - false, false, false, Alignment); + return DAG.getLoad( + VT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, Alignment); } SmallSet<SDValue, 16> DefinedValues; @@ -2205,47 +2338,6 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, return ExpandLibCall(LC, Node, isSigned); } -/// Return true if divmod libcall is available. -static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, - const TargetLowering &TLI) { - RTLIB::Libcall LC; - switch (Node->getSimpleValueType(0).SimpleTy) { - default: llvm_unreachable("Unexpected request for libcall!"); - case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; - case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; - case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; - case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; - case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; - } - - return TLI.getLibcallName(LC) != nullptr; -} - -/// Only issue divrem libcall if both quotient and remainder are needed. -static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) { - // The other use might have been replaced with a divrem already. - unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; - unsigned OtherOpcode = 0; - if (isSigned) - OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV; - else - OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV; - - SDValue Op0 = Node->getOperand(0); - SDValue Op1 = Node->getOperand(1); - for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), - UE = Op0.getNode()->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; - if (User == Node) - continue; - if ((User->getOpcode() == OtherOpcode || User->getOpcode() == DivRemOpc) && - User->getOperand(0) == Op0 && - User->getOperand(1) == Op1) - return true; - } - return false; -} - /// Issue libcalls to __{u}divmod to compute div / rem pairs. void SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, @@ -2428,6 +2520,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, SDLoc dl) { + // TODO: Should any fast-math-flags be set for the created nodes? + if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { // simple 32-bit [signed|unsigned] integer to float/double expansion @@ -2611,14 +2705,15 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, Alignment = std::min(Alignment, 4u); SDValue FudgeInReg; if (DestVT == MVT::f32) - FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - false, false, false, Alignment); + FudgeInReg = DAG.getLoad( + MVT::f32, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, Alignment); else { - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, - DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - MVT::f32, false, false, false, Alignment); + SDValue Load = DAG.getExtLoad( + ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, + false, false, false, Alignment); HandleSDNode Handle(Load); LegalizeOp(Load.getNode()); FudgeInReg = Handle.getValue(); @@ -2713,6 +2808,31 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation); } +/// Open code the operations for BITREVERSE. +SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, SDLoc dl) { + EVT VT = Op.getValueType(); + EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + unsigned Sz = VT.getScalarSizeInBits(); + + SDValue Tmp, Tmp2; + Tmp = DAG.getConstant(0, dl, VT); + for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) { + if (I < J) + Tmp2 = + DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT)); + else + Tmp2 = + DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT)); + + APInt Shift(Sz, 1); + Shift = Shift.shl(J); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT)); + Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2); + } + + return Tmp; +} + /// Open code the operations for BSWAP of the specified operation. SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) { EVT VT = Op.getValueType(); @@ -2865,16 +2985,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, } } -std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { - unsigned Opc = Node->getOpcode(); - MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); - RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); - - return ExpandChainLibCall(LC, Node, false); -} - -void SelectionDAGLegalize::ExpandNode(SDNode *Node) { +bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SmallVector<SDValue, 8> Results; SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3, Tmp4; @@ -2888,6 +2999,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl); Results.push_back(Tmp1); break; + case ISD::BITREVERSE: + Results.push_back(ExpandBITREVERSE(Node->getOperand(0), dl)); + break; case ISD::BSWAP: Results.push_back(ExpandBSWAP(Node->getOperand(0), dl)); break; @@ -2908,30 +3022,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // preserve the chain and be done. Results.push_back(Node->getOperand(0)); break; + case ISD::READCYCLECOUNTER: + // If the target didn't expand this, just return 'zero' and preserve the + // chain. + Results.append(Node->getNumValues() - 1, + DAG.getConstant(0, dl, Node->getValueType(0))); + Results.push_back(Node->getOperand(0)); + break; case ISD::EH_SJLJ_SETJMP: // If the target didn't expand this, just return 'zero' and preserve the // chain. Results.push_back(DAG.getConstant(0, dl, MVT::i32)); Results.push_back(Node->getOperand(0)); break; - case ISD::ATOMIC_FENCE: { - // If the target didn't lower this, lower it to '__sync_synchronize()' call - // FIXME: handle "fence singlethread" more efficiently. - TargetLowering::ArgListTy Args; - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl) - .setChain(Node->getOperand(0)) - .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol("__sync_synchronize", - TLI.getPointerTy(DAG.getDataLayout())), - std::move(Args), 0); - - std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); - - Results.push_back(CallResult.second); - break; - } case ISD::ATOMIC_LOAD: { // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP. SDValue Zero = DAG.getConstant(0, dl, Node->getValueType(0)); @@ -2959,26 +3062,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Swap.getValue(1)); break; } - // By default, atomic intrinsics are marked Legal and lowered. Targets - // which don't support them directly, however, may want libcalls, in which - // case they mark them Expand, and we get here. - case ISD::ATOMIC_SWAP: - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_SUB: - case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_LOAD_NAND: - case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_MAX: - case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_UMAX: - case ISD::ATOMIC_CMP_SWAP: { - std::pair<SDValue, SDValue> Tmp = ExpandAtomic(Node); - Results.push_back(Tmp.first); - Results.push_back(Tmp.second); - break; - } case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { // Expanding an ATOMIC_CMP_SWAP_WITH_SUCCESS produces an ATOMIC_CMP_SWAP and // splits out the success value as a comparison. Expanding the resulting @@ -3017,21 +3100,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } break; } - case ISD::TRAP: { - // If this operation is not supported, lower it to 'abort()' call - TargetLowering::ArgListTy Args; - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl) - .setChain(Node->getOperand(0)) - .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol("abort", - TLI.getPointerTy(DAG.getDataLayout())), - std::move(Args), 0); - std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); - - Results.push_back(CallResult.second); - break; - } case ISD::FP_ROUND: case ISD::BITCAST: Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0), @@ -3097,6 +3165,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getOperand(0), Tmp1, ISD::SETLT); True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0)); + // TODO: Should any fast-math-flags be set for the FSUB? False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, DAG.getNode(ISD::FSUB, dl, VT, Node->getOperand(0), Tmp1)); @@ -3106,57 +3175,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; } - case ISD::VAARG: { - const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); - EVT VT = Node->getValueType(0); - Tmp1 = Node->getOperand(0); - Tmp2 = Node->getOperand(1); - unsigned Align = Node->getConstantOperandVal(3); - - SDValue VAListLoad = - DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl, Tmp1, Tmp2, - MachinePointerInfo(V), false, false, false, 0); - SDValue VAList = VAListLoad; - - if (Align > TLI.getMinStackArgumentAlignment()) { - assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); - - VAList = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, - DAG.getConstant(Align - 1, dl, - VAList.getValueType())); - - VAList = DAG.getNode(ISD::AND, dl, VAList.getValueType(), VAList, - DAG.getConstant(-(int64_t)Align, dl, - VAList.getValueType())); - } - - // Increment the pointer, VAList, to the next vaarg - Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, - DAG.getConstant(DAG.getDataLayout().getTypeAllocSize( - VT.getTypeForEVT(*DAG.getContext())), - dl, VAList.getValueType())); - // Store the incremented VAList to the legalized pointer - Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, - MachinePointerInfo(V), false, false, 0); - // Load the actual argument out of the pointer VAList - Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(), - false, false, false, 0)); + case ISD::VAARG: + Results.push_back(DAG.expandVAArg(Node)); Results.push_back(Results[0].getValue(1)); break; - } - case ISD::VACOPY: { - // This defaults to loading a pointer from the input and storing it to the - // output, returning the chain. - const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue(); - const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue(); - Tmp1 = DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl, - Node->getOperand(0), Node->getOperand(2), - MachinePointerInfo(VS), false, false, false, 0); - Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), - MachinePointerInfo(VD), false, false, 0); - Results.push_back(Tmp1); + case ISD::VACOPY: + Results.push_back(DAG.expandVACopy(Node)); break; - } case ISD::EXTRACT_VECTOR_ELT: if (Node->getOperand(0).getValueType().getVectorNumElements() == 1) // This must be an access of the only element. Return it. @@ -3302,28 +3327,24 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Node->getOperand(0)); } break; + case ISD::GET_DYNAMIC_AREA_OFFSET: + Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0))); + Results.push_back(Results[0].getValue(0)); + break; case ISD::FCOPYSIGN: Results.push_back(ExpandFCOPYSIGN(Node)); break; case ISD::FNEG: // Expand Y = FNEG(X) -> Y = SUB -0.0, X Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0)); + // TODO: If FNEG has fast-math-flags, propagate them to the FSUB. Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1, Node->getOperand(0)); Results.push_back(Tmp1); break; - case ISD::FABS: { - // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X). - EVT VT = Node->getValueType(0); - Tmp1 = Node->getOperand(0); - Tmp2 = DAG.getConstantFP(0.0, dl, VT); - Tmp2 = DAG.getSetCC(dl, getSetCCResultType(Tmp1.getValueType()), - Tmp1, Tmp2, ISD::SETUGT); - Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1); - Tmp1 = DAG.getSelect(dl, VT, Tmp2, Tmp1, Tmp3); - Results.push_back(Tmp1); + case ISD::FABS: + Results.push_back(ExpandFABS(Node)); break; - } case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: @@ -3344,25 +3365,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } - case ISD::FMINNUM: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, - RTLIB::FMIN_F80, RTLIB::FMIN_F128, - RTLIB::FMIN_PPCF128)); - break; - case ISD::FMAXNUM: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64, - RTLIB::FMAX_F80, RTLIB::FMAX_F128, - RTLIB::FMAX_PPCF128)); - break; - case ISD::FSQRT: - Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, - RTLIB::SQRT_F80, RTLIB::SQRT_F128, - RTLIB::SQRT_PPCF128)); - break; case ISD::FSIN: case ISD::FCOS: { EVT VT = Node->getValueType(0); - bool isSIN = Node->getOpcode() == ISD::FSIN; // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin / // fcos which share the same operand and both are used. if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) || @@ -3370,137 +3375,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { && useSinCos(Node)) { SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0)); - if (!isSIN) + if (Node->getOpcode() == ISD::FCOS) Tmp1 = Tmp1.getValue(1); Results.push_back(Tmp1); - } else if (isSIN) { - Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, - RTLIB::SIN_F80, RTLIB::SIN_F128, - RTLIB::SIN_PPCF128)); - } else { - Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, - RTLIB::COS_F80, RTLIB::COS_F128, - RTLIB::COS_PPCF128)); } break; } - case ISD::FSINCOS: - // Expand into sincos libcall. - ExpandSinCosLibCall(Node, Results); - break; - case ISD::FLOG: - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, - RTLIB::LOG_F80, RTLIB::LOG_F128, - RTLIB::LOG_PPCF128)); - break; - case ISD::FLOG2: - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, - RTLIB::LOG2_F80, RTLIB::LOG2_F128, - RTLIB::LOG2_PPCF128)); - break; - case ISD::FLOG10: - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, - RTLIB::LOG10_F80, RTLIB::LOG10_F128, - RTLIB::LOG10_PPCF128)); - break; - case ISD::FEXP: - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, - RTLIB::EXP_F80, RTLIB::EXP_F128, - RTLIB::EXP_PPCF128)); - break; - case ISD::FEXP2: - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, - RTLIB::EXP2_F80, RTLIB::EXP2_F128, - RTLIB::EXP2_PPCF128)); - break; - case ISD::FTRUNC: - Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, - RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, - RTLIB::TRUNC_PPCF128)); - break; - case ISD::FFLOOR: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, - RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, - RTLIB::FLOOR_PPCF128)); - break; - case ISD::FCEIL: - Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, - RTLIB::CEIL_F80, RTLIB::CEIL_F128, - RTLIB::CEIL_PPCF128)); - break; - case ISD::FRINT: - Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, - RTLIB::RINT_F80, RTLIB::RINT_F128, - RTLIB::RINT_PPCF128)); - break; - case ISD::FNEARBYINT: - Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, - RTLIB::NEARBYINT_F64, - RTLIB::NEARBYINT_F80, - RTLIB::NEARBYINT_F128, - RTLIB::NEARBYINT_PPCF128)); - break; - case ISD::FROUND: - Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32, - RTLIB::ROUND_F64, - RTLIB::ROUND_F80, - RTLIB::ROUND_F128, - RTLIB::ROUND_PPCF128)); - break; - case ISD::FPOWI: - Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, - RTLIB::POWI_F80, RTLIB::POWI_F128, - RTLIB::POWI_PPCF128)); - break; - case ISD::FPOW: - Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, - RTLIB::POW_F80, RTLIB::POW_F128, - RTLIB::POW_PPCF128)); - break; - case ISD::FDIV: - Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, - RTLIB::DIV_F80, RTLIB::DIV_F128, - RTLIB::DIV_PPCF128)); - break; - case ISD::FREM: - Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, - RTLIB::REM_F80, RTLIB::REM_F128, - RTLIB::REM_PPCF128)); - break; - case ISD::FMA: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, - RTLIB::FMA_F80, RTLIB::FMA_F128, - RTLIB::FMA_PPCF128)); - break; case ISD::FMAD: llvm_unreachable("Illegal fmad should never be formed"); - case ISD::FADD: - Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, - RTLIB::ADD_F80, RTLIB::ADD_F128, - RTLIB::ADD_PPCF128)); - break; - case ISD::FMUL: - Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, - RTLIB::MUL_F80, RTLIB::MUL_F128, - RTLIB::MUL_PPCF128)); - break; - case ISD::FP16_TO_FP: { - if (Node->getValueType(0) == MVT::f32) { - Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); - break; + case ISD::FP16_TO_FP: + if (Node->getValueType(0) != MVT::f32) { + // We can extend to types bigger than f32 in two steps without changing + // the result. Since "f16 -> f32" is much more commonly available, give + // CodeGen the option of emitting that before resorting to a libcall. + SDValue Res = + DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0)); + Results.push_back( + DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res)); } - - // We can extend to types bigger than f32 in two steps without changing the - // result. Since "f16 -> f32" is much more commonly available, give CodeGen - // the option of emitting that before resorting to a libcall. - SDValue Res = - DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0)); - Results.push_back( - DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res)); break; - } - case ISD::FP_TO_FP16: { + case ISD::FP_TO_FP16: if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) { SDValue Op = Node->getOperand(0); MVT SVT = Op.getSimpleValueType(); @@ -3512,16 +3407,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { DAG.getIntPtrConstant(0, dl)); Results.push_back( DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, FloatVal)); - break; } } - - RTLIB::Libcall LC = - RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16"); - Results.push_back(ExpandLibCall(LC, Node, false)); break; - } case ISD::ConstantFP: { ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node); // Check to see if this FP immediate is already legal. @@ -3530,17 +3418,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(ExpandConstantFP(CFP, true)); break; } + case ISD::Constant: { + ConstantSDNode *CP = cast<ConstantSDNode>(Node); + Results.push_back(ExpandConstant(CP)); + break; + } case ISD::FSUB: { EVT VT = Node->getValueType(0); if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) && TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) { + const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(Node)->Flags; Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); - Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1); + Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1, Flags); Results.push_back(Tmp1); - } else { - Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, - RTLIB::SUB_F80, RTLIB::SUB_F128, - RTLIB::SUB_PPCF128)); } break; } @@ -3564,29 +3454,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; Tmp2 = Node->getOperand(0); Tmp3 = Node->getOperand(1); - if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || - (isDivRemLibcallAvailable(Node, isSigned, TLI) && - // If div is legal, it's better to do the normal expansion - !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) && - useDivRem(Node, isSigned, false))) { + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); + Results.push_back(Tmp1); } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { // X % Y -> X-X/Y*Y Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3); Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3); Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1); - } else if (isSigned) - Tmp1 = ExpandIntLibCall(Node, true, - RTLIB::SREM_I8, - RTLIB::SREM_I16, RTLIB::SREM_I32, - RTLIB::SREM_I64, RTLIB::SREM_I128); - else - Tmp1 = ExpandIntLibCall(Node, false, - RTLIB::UREM_I8, - RTLIB::UREM_I16, RTLIB::UREM_I32, - RTLIB::UREM_I64, RTLIB::UREM_I128); - Results.push_back(Tmp1); + Results.push_back(Tmp1); + } break; } case ISD::UDIV: @@ -3594,23 +3472,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { bool isSigned = Node->getOpcode() == ISD::SDIV; unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; EVT VT = Node->getValueType(0); - SDVTList VTs = DAG.getVTList(VT, VT); - if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || - (isDivRemLibcallAvailable(Node, isSigned, TLI) && - useDivRem(Node, isSigned, true))) + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { + SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), Node->getOperand(1)); - else if (isSigned) - Tmp1 = ExpandIntLibCall(Node, true, - RTLIB::SDIV_I8, - RTLIB::SDIV_I16, RTLIB::SDIV_I32, - RTLIB::SDIV_I64, RTLIB::SDIV_I128); - else - Tmp1 = ExpandIntLibCall(Node, false, - RTLIB::UDIV_I8, - RTLIB::UDIV_I16, RTLIB::UDIV_I32, - RTLIB::UDIV_I64, RTLIB::UDIV_I128); - Results.push_back(Tmp1); + Results.push_back(Tmp1); + } break; } case ISD::MULHU: @@ -3626,11 +3493,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1.getValue(1)); break; } - case ISD::SDIVREM: - case ISD::UDIVREM: - // Expand into divrem libcall - ExpandDivRemLibCall(Node, Results); - break; case ISD::MUL: { EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); @@ -3673,14 +3535,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { TLI.getShiftAmountTy(HalfType, DAG.getDataLayout())); Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift); Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi)); - break; } - - Tmp1 = ExpandIntLibCall(Node, false, - RTLIB::MUL_I8, - RTLIB::MUL_I16, RTLIB::MUL_I32, - RTLIB::MUL_I64, RTLIB::MUL_I128); - Results.push_back(Tmp1); break; } case ISD::SADDO: @@ -3867,9 +3722,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Index, Table); EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); - SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, - MachinePointerInfo::getJumpTable(), MemVT, - false, false, false, 0); + SDValue LD = DAG.getExtLoad( + ISD::SEXTLOAD, dl, PTy, Chain, Addr, + MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT, + false, false, false, 0); Addr = LD; if (TM.getRelocationModel() == Reloc::PIC_) { // For PIC, the sequence is: @@ -4092,16 +3948,276 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } // Replace the original node with the legalized result. + if (Results.empty()) + return false; + + ReplaceNode(Node, Results.data()); + return true; +} + +void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { + SmallVector<SDValue, 8> Results; + SDLoc dl(Node); + SDValue Tmp1, Tmp2, Tmp3, Tmp4; + unsigned Opc = Node->getOpcode(); + switch (Opc) { + case ISD::ATOMIC_FENCE: { + // If the target didn't lower this, lower it to '__sync_synchronize()' call + // FIXME: handle "fence singlethread" more efficiently. + TargetLowering::ArgListTy Args; + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Node->getOperand(0)) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("__sync_synchronize", + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args), 0); + + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + + Results.push_back(CallResult.second); + break; + } + // By default, atomic intrinsics are marked Legal and lowered. Targets + // which don't support them directly, however, may want libcalls, in which + // case they mark them Expand, and we get here. + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_CMP_SWAP: { + MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); + RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); + + std::pair<SDValue, SDValue> Tmp = ExpandChainLibCall(LC, Node, false); + Results.push_back(Tmp.first); + Results.push_back(Tmp.second); + break; + } + case ISD::TRAP: { + // If this operation is not supported, lower it to 'abort()' call + TargetLowering::ArgListTy Args; + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Node->getOperand(0)) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("abort", + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args), 0); + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + + Results.push_back(CallResult.second); + break; + } + case ISD::FMINNUM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, + RTLIB::FMIN_F80, RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128)); + break; + case ISD::FMAXNUM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64, + RTLIB::FMAX_F80, RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128)); + break; + case ISD::FSQRT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, + RTLIB::SQRT_F80, RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128)); + break; + case ISD::FSIN: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_F128, + RTLIB::SIN_PPCF128)); + break; + case ISD::FCOS: + Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_F128, + RTLIB::COS_PPCF128)); + break; + case ISD::FSINCOS: + // Expand into sincos libcall. + ExpandSinCosLibCall(Node, Results); + break; + case ISD::FLOG: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_F128, + RTLIB::LOG_PPCF128)); + break; + case ISD::FLOG2: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128)); + break; + case ISD::FLOG10: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128)); + break; + case ISD::FEXP: + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_F128, + RTLIB::EXP_PPCF128)); + break; + case ISD::FEXP2: + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128)); + break; + case ISD::FTRUNC: + Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128)); + break; + case ISD::FFLOOR: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128)); + break; + case ISD::FCEIL: + Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, + RTLIB::CEIL_F80, RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128)); + break; + case ISD::FRINT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, + RTLIB::RINT_F80, RTLIB::RINT_F128, + RTLIB::RINT_PPCF128)); + break; + case ISD::FNEARBYINT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, + RTLIB::NEARBYINT_PPCF128)); + break; + case ISD::FROUND: + Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128)); + break; + case ISD::FPOWI: + Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, + RTLIB::POWI_F80, RTLIB::POWI_F128, + RTLIB::POWI_PPCF128)); + break; + case ISD::FPOW: + Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_F128, + RTLIB::POW_PPCF128)); + break; + case ISD::FDIV: + Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, + RTLIB::DIV_F80, RTLIB::DIV_F128, + RTLIB::DIV_PPCF128)); + break; + case ISD::FREM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, + RTLIB::REM_F80, RTLIB::REM_F128, + RTLIB::REM_PPCF128)); + break; + case ISD::FMA: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, + RTLIB::FMA_F80, RTLIB::FMA_F128, + RTLIB::FMA_PPCF128)); + break; + case ISD::FADD: + Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, + RTLIB::ADD_F80, RTLIB::ADD_F128, + RTLIB::ADD_PPCF128)); + break; + case ISD::FMUL: + Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, + RTLIB::MUL_F80, RTLIB::MUL_F128, + RTLIB::MUL_PPCF128)); + break; + case ISD::FP16_TO_FP: + if (Node->getValueType(0) == MVT::f32) { + Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); + } + break; + case ISD::FP_TO_FP16: { + RTLIB::Libcall LC = + RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16"); + Results.push_back(ExpandLibCall(LC, Node, false)); + break; + } + case ISD::FSUB: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, + RTLIB::SUB_F80, RTLIB::SUB_F128, + RTLIB::SUB_PPCF128)); + break; + case ISD::SREM: + Results.push_back(ExpandIntLibCall(Node, true, + RTLIB::SREM_I8, + RTLIB::SREM_I16, RTLIB::SREM_I32, + RTLIB::SREM_I64, RTLIB::SREM_I128)); + break; + case ISD::UREM: + Results.push_back(ExpandIntLibCall(Node, false, + RTLIB::UREM_I8, + RTLIB::UREM_I16, RTLIB::UREM_I32, + RTLIB::UREM_I64, RTLIB::UREM_I128)); + break; + case ISD::SDIV: + Results.push_back(ExpandIntLibCall(Node, true, + RTLIB::SDIV_I8, + RTLIB::SDIV_I16, RTLIB::SDIV_I32, + RTLIB::SDIV_I64, RTLIB::SDIV_I128)); + break; + case ISD::UDIV: + Results.push_back(ExpandIntLibCall(Node, false, + RTLIB::UDIV_I8, + RTLIB::UDIV_I16, RTLIB::UDIV_I32, + RTLIB::UDIV_I64, RTLIB::UDIV_I128)); + break; + case ISD::SDIVREM: + case ISD::UDIVREM: + // Expand into divrem libcall + ExpandDivRemLibCall(Node, Results); + break; + case ISD::MUL: + Results.push_back(ExpandIntLibCall(Node, false, + RTLIB::MUL_I8, + RTLIB::MUL_I16, RTLIB::MUL_I32, + RTLIB::MUL_I64, RTLIB::MUL_I128)); + break; + } + + // Replace the original node with the legalized result. if (!Results.empty()) ReplaceNode(Node, Results.data()); } +// Determine the vector type to use in place of an original scalar element when +// promoting equally sized vectors. +static MVT getPromotedVectorElementType(const TargetLowering &TLI, + MVT EltVT, MVT NewEltVT) { + unsigned OldEltsPerNewElt = EltVT.getSizeInBits() / NewEltVT.getSizeInBits(); + MVT MidVT = MVT::getVectorVT(NewEltVT, OldEltsPerNewElt); + assert(TLI.isTypeLegal(MidVT) && "unexpected"); + return MidVT; +} + void SelectionDAGLegalize::PromoteNode(SDNode *Node) { SmallVector<SDValue, 8> Results; MVT OVT = Node->getSimpleValueType(0); if (Node->getOpcode() == ISD::UINT_TO_FP || Node->getOpcode() == ISD::SINT_TO_FP || - Node->getOpcode() == ISD::SETCC) { + Node->getOpcode() == ISD::SETCC || + Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT || + Node->getOpcode() == ISD::INSERT_VECTOR_ELT) { OVT = Node->getOperand(0).getSimpleValueType(); } if (Node->getOpcode() == ISD::BR_CC) @@ -4284,11 +4400,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FREM: case ISD::FMINNUM: case ISD::FMAXNUM: - case ISD::FCOPYSIGN: case ISD::FPOW: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); - Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, + Node->getFlags()); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3, DAG.getIntPtrConstant(0, dl))); break; @@ -4303,12 +4419,20 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { DAG.getIntPtrConstant(0, dl))); break; } + case ISD::FCOPYSIGN: case ISD::FPOWI: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = Node->getOperand(1); Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + + // fcopysign doesn't change anything but the sign bit, so + // (fp_round (fcopysign (fpext a), b)) + // is as precise as + // (fp_round (fpext a)) + // which is a no-op. Mark it as a TRUNCating FP_ROUND. + const bool isTrunc = (Node->getOpcode() == ISD::FCOPYSIGN); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, - Tmp3, DAG.getIntPtrConstant(0, dl))); + Tmp3, DAG.getIntPtrConstant(isTrunc, dl))); break; } case ISD::FFLOOR: @@ -4333,6 +4457,157 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp2, DAG.getIntPtrConstant(0, dl))); break; } + case ISD::BUILD_VECTOR: { + MVT EltVT = OVT.getVectorElementType(); + MVT NewEltVT = NVT.getVectorElementType(); + + // Handle bitcasts to a different vector type with the same total bit size + // + // e.g. v2i64 = build_vector i64:x, i64:y => v4i32 + // => + // v4i32 = concat_vectors (v2i32 (bitcast i64:x)), (v2i32 (bitcast i64:y)) + + assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() && + "Invalid promote type for build_vector"); + assert(NewEltVT.bitsLT(EltVT) && "not handled"); + + MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); + + SmallVector<SDValue, 8> NewOps; + for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) { + SDValue Op = Node->getOperand(I); + NewOps.push_back(DAG.getNode(ISD::BITCAST, SDLoc(Op), MidVT, Op)); + } + + SDLoc SL(Node); + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewOps); + SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat); + Results.push_back(CvtVec); + break; + } + case ISD::EXTRACT_VECTOR_ELT: { + MVT EltVT = OVT.getVectorElementType(); + MVT NewEltVT = NVT.getVectorElementType(); + + // Handle bitcasts to a different vector type with the same total bit size. + // + // e.g. v2i64 = extract_vector_elt x:v2i64, y:i32 + // => + // v4i32:castx = bitcast x:v2i64 + // + // i64 = bitcast + // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))), + // (i32 (extract_vector_elt castx, (2 * y + 1))) + // + + assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() && + "Invalid promote type for extract_vector_elt"); + assert(NewEltVT.bitsLT(EltVT) && "not handled"); + + MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); + unsigned NewEltsPerOldElt = MidVT.getVectorNumElements(); + + SDValue Idx = Node->getOperand(1); + EVT IdxVT = Idx.getValueType(); + SDLoc SL(Node); + SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SL, IdxVT); + SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor); + + SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0)); + + SmallVector<SDValue, 8> NewOps; + for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { + SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT); + SDValue TmpIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset); + + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT, + CastVec, TmpIdx); + NewOps.push_back(Elt); + } + + SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, SL, MidVT, NewOps); + + Results.push_back(DAG.getNode(ISD::BITCAST, SL, EltVT, NewVec)); + break; + } + case ISD::INSERT_VECTOR_ELT: { + MVT EltVT = OVT.getVectorElementType(); + MVT NewEltVT = NVT.getVectorElementType(); + + // Handle bitcasts to a different vector type with the same total bit size + // + // e.g. v2i64 = insert_vector_elt x:v2i64, y:i64, z:i32 + // => + // v4i32:castx = bitcast x:v2i64 + // v2i32:casty = bitcast y:i64 + // + // v2i64 = bitcast + // (v4i32 insert_vector_elt + // (v4i32 insert_vector_elt v4i32:castx, + // (extract_vector_elt casty, 0), 2 * z), + // (extract_vector_elt casty, 1), (2 * z + 1)) + + assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() && + "Invalid promote type for insert_vector_elt"); + assert(NewEltVT.bitsLT(EltVT) && "not handled"); + + MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); + unsigned NewEltsPerOldElt = MidVT.getVectorNumElements(); + + SDValue Val = Node->getOperand(1); + SDValue Idx = Node->getOperand(2); + EVT IdxVT = Idx.getValueType(); + SDLoc SL(Node); + + SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SDLoc(), IdxVT); + SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor); + + SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0)); + SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val); + + SDValue NewVec = CastVec; + for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { + SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT); + SDValue InEltIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset); + + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT, + CastVal, IdxOffset); + + NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, NVT, + NewVec, Elt, InEltIdx); + } + + Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewVec)); + break; + } + case ISD::SCALAR_TO_VECTOR: { + MVT EltVT = OVT.getVectorElementType(); + MVT NewEltVT = NVT.getVectorElementType(); + + // Handle bitcasts to different vector type with the smae total bit size. + // + // e.g. v2i64 = scalar_to_vector x:i64 + // => + // concat_vectors (v2i32 bitcast x:i64), (v2i32 undef) + // + + MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); + SDValue Val = Node->getOperand(0); + SDLoc SL(Node); + + SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val); + SDValue Undef = DAG.getUNDEF(MidVT); + + SmallVector<SDValue, 8> NewElts; + NewElts.push_back(CastVal); + for (unsigned I = 1, NElts = OVT.getVectorNumElements(); I != NElts; ++I) + NewElts.push_back(Undef); + + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewElts); + SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat); + Results.push_back(CvtVec); + break; + } } // Replace the original node with the legalized result. @@ -4356,7 +4631,7 @@ void SelectionDAG::Legalize() { for (auto NI = allnodes_end(); NI != allnodes_begin();) { --NI; - SDNode *N = NI; + SDNode *N = &*NI; if (N->use_empty() && N != getRoot().getNode()) { ++NI; DeleteNode(N); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 3c50a4155731..6c0193a76732 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -43,10 +43,10 @@ static RTLIB::Libcall GetFPLibCall(EVT VT, } //===----------------------------------------------------------------------===// -// Result Float to Integer Conversion. +// Convert Float Results to Integer for Non-HW-supported Operations. //===----------------------------------------------------------------------===// -void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { +bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); dbgs() << "\n"); SDValue R = SDValue(); @@ -59,20 +59,26 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to soften the result of this operator!"); + case ISD::Register: + case ISD::CopyFromReg: + case ISD::CopyToReg: + assert(isLegalInHWReg(N->getValueType(ResNo)) && + "Unsupported SoftenFloatRes opcode!"); + // Only when isLegalInHWReg, we can skip check of the operands. + R = SDValue(N, ResNo); + break; case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break; - case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break; + case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N, ResNo); break; case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; - case ISD::ConstantFP: - R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N)); - break; + case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break; case ISD::EXTRACT_VECTOR_ELT: R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break; - case ISD::FABS: R = SoftenFloatRes_FABS(N); break; + case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break; case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; case ISD::FADD: R = SoftenFloatRes_FADD(N); break; case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; - case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; + case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N, ResNo); break; case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break; @@ -84,7 +90,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMA: R = SoftenFloatRes_FMA(N); break; case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; - case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; + case ISD::FNEG: R = SoftenFloatRes_FNEG(N, ResNo); break; case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break; @@ -97,9 +103,9 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; - case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; - case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break; - case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break; + case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break; + case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break; + case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break; case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break; @@ -107,11 +113,19 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { } // If R is null, the sub-method took care of registering the result. - if (R.getNode()) + if (R.getNode()) { SetSoftenedFloat(SDValue(N, ResNo), R); + ReplaceSoftenFloatResult(N, ResNo, R); + } + // Return true only if the node is changed, + // assuming that the operands are also converted when necessary. + // Otherwise, return false to tell caller to scan operands. + return R.getNode() && R.getNode() != N; } -SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) { + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); return BitConvertToInteger(N->getOperand(0)); } @@ -130,10 +144,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { BitConvertToInteger(N->getOperand(1))); } -SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) { - return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), SDLoc(N), +SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, we can load better from the constant pool. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); + ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N); + return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN), TLI.getTypeToTransformTo(*DAG.getContext(), - N->getValueType(0))); + CN->getValueType(0))); } SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -143,7 +161,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { NewOp, N->getOperand(1)); } -SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, FABS can be implemented as native bitwise operations. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned Size = NVT.getSizeInBits(); @@ -165,7 +186,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) { RTLIB::FMIN_F80, RTLIB::FMIN_F128, RTLIB::FMIN_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) { @@ -178,7 +199,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) { RTLIB::FMAX_F80, RTLIB::FMAX_F128, RTLIB::FMAX_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { @@ -191,7 +212,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { RTLIB::ADD_F80, RTLIB::ADD_F128, RTLIB::ADD_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { @@ -203,10 +224,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { RTLIB::CEIL_F80, RTLIB::CEIL_F128, RTLIB::CEIL_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, FCOPYSIGN can be implemented as native bitwise operations. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); SDValue LHS = GetSoftenedFloat(N->getOperand(0)); SDValue RHS = BitConvertToInteger(N->getOperand(1)); SDLoc dl(N); @@ -263,7 +287,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { RTLIB::COS_F80, RTLIB::COS_F128, RTLIB::COS_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { @@ -276,7 +300,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { RTLIB::DIV_F80, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { @@ -288,7 +312,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { RTLIB::EXP_F80, RTLIB::EXP_F128, RTLIB::EXP_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { @@ -300,7 +324,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { RTLIB::EXP2_F80, RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { @@ -312,7 +336,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, RTLIB::FLOOR_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { @@ -324,7 +348,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { RTLIB::LOG_F80, RTLIB::LOG_F128, RTLIB::LOG_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { @@ -336,7 +360,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { RTLIB::LOG2_F80, RTLIB::LOG2_F128, RTLIB::LOG2_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { @@ -348,7 +372,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { RTLIB::LOG10_F80, RTLIB::LOG10_F128, RTLIB::LOG10_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { @@ -362,7 +386,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), - NVT, Ops, 3, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { @@ -375,7 +399,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { @@ -387,10 +411,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { RTLIB::NEARBYINT_F80, RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, FNEG can be implemented as native bitwise operations. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); // Expand Y = FNEG(X) -> Y = SUB -0.0, X @@ -402,7 +429,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, dl).first; + NVT, Ops, false, dl).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { @@ -418,11 +445,20 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SoftenFloatResult(Op.getNode(), 0); } + if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) { + Op = GetPromotedFloat(Op); + // If the promotion did the FP_EXTEND to the destination type for us, + // there's nothing left to do here. + if (Op.getValueType() == N->getValueType(0)) { + return BitConvertToInteger(Op); + } + } + RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftenFloat) Op = GetSoftenedFloat(Op); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; + return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first; } // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special @@ -430,7 +466,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32); SDValue Op = N->getOperand(0); - SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, &Op, 1, + SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op, false, SDLoc(N)).first; if (N->getValueType(0) == MVT::f32) return Res32; @@ -438,7 +474,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return TLI.makeLibCall(DAG, LC, NVT, &Res32, 1, false, SDLoc(N)).first; + return TLI.makeLibCall(DAG, LC, NVT, Res32, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { @@ -452,7 +488,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); - return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; + return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { @@ -465,7 +501,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { RTLIB::POW_F80, RTLIB::POW_F128, RTLIB::POW_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { @@ -479,7 +515,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { RTLIB::POWI_F80, RTLIB::POWI_F128, RTLIB::POWI_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { @@ -492,7 +528,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { RTLIB::REM_F80, RTLIB::REM_F128, RTLIB::REM_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { @@ -504,7 +540,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { RTLIB::RINT_F80, RTLIB::RINT_F128, RTLIB::RINT_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { @@ -516,7 +552,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { RTLIB::ROUND_F80, RTLIB::ROUND_F128, RTLIB::ROUND_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { @@ -528,7 +564,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { RTLIB::SIN_F80, RTLIB::SIN_F128, RTLIB::SIN_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { @@ -540,7 +576,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { RTLIB::SQRT_F80, RTLIB::SQRT_F128, RTLIB::SQRT_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { @@ -553,7 +589,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { @@ -568,10 +604,11 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, RTLIB::TRUNC_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) { + bool LegalInHWReg = isLegalInHWReg(N->getValueType(ResNo)); LoadSDNode *L = cast<LoadSDNode>(N); EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); @@ -586,7 +623,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. - ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + if (N != NewL.getValue(1).getNode()) + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); return NewL; } @@ -600,17 +638,24 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); - return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL)); + auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL); + if (LegalInHWReg) + return ExtendNode; + return BitConvertToInteger(ExtendNode); } -SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo) { + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); SDValue LHS = GetSoftenedFloat(N->getOperand(1)); SDValue RHS = GetSoftenedFloat(N->getOperand(2)); return DAG.getSelect(SDLoc(N), LHS.getValueType(), N->getOperand(0), LHS, RHS); } -SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo) { + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); SDValue LHS = GetSoftenedFloat(N->getOperand(2)); SDValue RHS = GetSoftenedFloat(N->getOperand(3)); return DAG.getNode(ISD::SELECT_CC, SDLoc(N), @@ -636,7 +681,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { // Legalized the chain result - switch anything that used the old chain to // use the new one. - ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1)); + if (N != NewVAARG.getValue(1).getNode()) + ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1)); return NewVAARG; } @@ -665,12 +711,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { NVT, N->getOperand(0)); return TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), - &Op, 1, Signed, dl).first; + Op, Signed, dl).first; } //===----------------------------------------------------------------------===// -// Operand Float to Integer Conversion.. +// Convert Float Operand to Integer for Non-HW-supported Operations. //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { @@ -680,6 +726,8 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { switch (N->getOpcode()) { default: + if (CanSkipSoftenFloatOperand(N, OpNo)) + return false; #ifndef NDEBUG dbgs() << "SoftenFloatOperand Op #" << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"; @@ -691,18 +739,27 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break; case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; - case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break; - case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; - case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; + case ISD::STORE: + Res = SoftenFloatOp_STORE(N, OpNo); + // Do not try to analyze or soften this node again if the value is + // or can be held in a register. In that case, Res.getNode() should + // be equal to N. + if (Res.getNode() == N && + isLegalInHWReg(N->getOperand(OpNo).getValueType())) + return false; + // Otherwise, we need to reanalyze and lower the new Res nodes. + break; } // If the result is null, the sub-method took care of registering results etc. if (!Res.getNode()) return false; // If the result is N, the sub-method updated N in place. Tell the legalizer - // core about this. + // core about this to re-analyze. if (Res.getNode() == N) return true; @@ -713,6 +770,41 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { return false; } +bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) { + if (!isLegalInHWReg(N->getOperand(OpNo).getValueType())) + return false; + // When the operand type can be kept in registers, SoftenFloatResult + // will call ReplaceValueWith to replace all references and we can + // skip softening this operand. + switch (N->getOperand(OpNo).getOpcode()) { + case ISD::BITCAST: + case ISD::ConstantFP: + case ISD::CopyFromReg: + case ISD::CopyToReg: + case ISD::FABS: + case ISD::FCOPYSIGN: + case ISD::FNEG: + case ISD::Register: + case ISD::SELECT: + case ISD::SELECT_CC: + return true; + } + // For some opcodes, SoftenFloatResult handles all conversion of softening + // and replacing operands, so that there is no need to soften operands + // again, although such opcode could be scanned for other illegal operands. + switch (N->getOpcode()) { + case ISD::ConstantFP: + case ISD::CopyFromReg: + case ISD::CopyToReg: + case ISD::FABS: + case ISD::FCOPYSIGN: + case ISD::FNEG: + case ISD::Register: + return true; + } + return false; +} + SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), GetSoftenedFloat(N->getOperand(0))); @@ -730,7 +822,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall"); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; + return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first; } @@ -747,7 +839,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; + return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { @@ -773,20 +865,33 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { 0); } -SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { + bool Signed = N->getOpcode() == ISD::FP_TO_SINT; + EVT SVT = N->getOperand(0).getValueType(); EVT RVT = N->getValueType(0); - RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; -} + EVT NVT = EVT(); + SDLoc dl(N); + + // If the result is not legal, eg: fp -> i1, then it needs to be promoted to + // a larger type, eg: fp -> i32. Even if it is legal, no libcall may exactly + // match, eg. we don't have fp -> i8 conversions. + // Look for an appropriate libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE; + IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; + ++IntVT) { + NVT = (MVT::SimpleValueType)IntVT; + // The type needs to big enough to hold the result. + if (NVT.bitsGE(RVT)) + LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT):RTLIB::getFPTOUINT(SVT, NVT); + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!"); -SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { - EVT RVT = N->getValueType(0); - RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; + SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, false, dl).first; + + // Truncate the result if the libcall returns a larger type. + return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res); } SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { @@ -1028,7 +1133,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, RTLIB::DIV_F80, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), - N->getValueType(0), Ops, 2, false, + N->getValueType(0), Ops, false, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1102,7 +1207,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), - N->getValueType(0), Ops, 3, false, + N->getValueType(0), Ops, false, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1116,7 +1221,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), - N->getValueType(0), Ops, 2, false, + N->getValueType(0), Ops, false, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1231,7 +1336,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - N->getValueType(0), Ops, 2, false, + N->getValueType(0), Ops, false, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1310,7 +1415,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, } assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); - Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl).first; + Hi = TLI.makeLibCall(DAG, LC, VT, Src, true, dl).first; GetPairElements(Hi, Lo, Hi); } @@ -1341,6 +1446,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, break; } + // TODO: Are there fast-math-flags to propagate to this FADD? Lo = DAG.getNode(ISD::FADD, dl, VT, Hi, DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble, APInt(128, Parts)), @@ -1494,7 +1600,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); - return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl).first; + return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), false, dl).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { @@ -1511,6 +1617,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128); // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X // FIXME: generated code sucks. + // TODO: Are there fast-math-flags to propagate to this FSUB? return DAG.getSelectCC(dl, N->getOperand(0), Tmp, DAG.getNode(ISD::ADD, dl, MVT::i32, DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, @@ -1527,7 +1634,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), &N->getOperand(0), 1, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0), false, dl).first; } @@ -1912,8 +2019,7 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_BinOp(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Op0 = GetPromotedFloat(N->getOperand(0)); SDValue Op1 = GetPromotedFloat(N->getOperand(1)); - - return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1); + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, N->getFlags()); } SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 9f060a09a0f3..cd114d668e20 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -53,6 +53,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break; case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break; case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break; + case ISD::BITREVERSE: Res = PromoteIntRes_BITREVERSE(N); break; case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break; case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break; case ISD::Constant: Res = PromoteIntRes_Constant(N); break; @@ -65,16 +66,20 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break; - case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break; - case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));break; + case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N)); break; + case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N)); + break; + case ISD::MGATHER: Res = PromoteIntRes_MGATHER(cast<MaskedGatherSDNode>(N)); + break; case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break; case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break; case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break; case ISD::SMIN: - case ISD::SMAX: + case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break; case ISD::UMIN: - case ISD::UMAX: Res = PromoteIntRes_SimpleIntBinOp(N); break; + case ISD::UMAX: Res = PromoteIntRes_ZExtIntBinOp(N); break; + case ISD::SHL: Res = PromoteIntRes_SHL(N); break; case ISD::SIGN_EXTEND_INREG: Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break; @@ -114,10 +119,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break; case ISD::SDIV: - case ISD::SREM: Res = PromoteIntRes_SDIV(N); break; + case ISD::SREM: Res = PromoteIntRes_SExtIntBinOp(N); break; case ISD::UDIV: - case ISD::UREM: Res = PromoteIntRes_UDIV(N); break; + case ISD::UREM: Res = PromoteIntRes_ZExtIntBinOp(N); break; case ISD::SADDO: case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break; @@ -180,7 +185,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) { N->getChain(), N->getBasePtr(), N->getMemOperand(), N->getOrdering(), N->getSynchScope()); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); return Res; @@ -193,7 +198,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { N->getChain(), N->getBasePtr(), Op2, N->getMemOperand(), N->getOrdering(), N->getSynchScope()); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); return Res; @@ -316,6 +321,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { TLI.getShiftAmountTy(NVT, DAG.getDataLayout()))); } +SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + EVT OVT = N->getValueType(0); + EVT NVT = Op.getValueType(); + SDLoc dl(N); + + unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); + return DAG.getNode( + ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), + DAG.getConstant(DiffBits, dl, + TLI.getShiftAmountTy(NVT, DAG.getDataLayout()))); +} + SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { // The pair element type may be legal, or may not promote to the same type as // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases. @@ -465,7 +483,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(), N->getMemoryVT(), N->getMemOperand()); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); return Res; @@ -475,20 +493,34 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0()); - SDValue Mask = N->getMask(); - EVT NewMaskVT = getSetCCResultType(NVT); - if (NewMaskVT != N->getMask().getValueType()) - Mask = PromoteTargetBoolean(Mask, NewMaskVT); SDLoc dl(N); - SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(), - Mask, ExtSrc0, N->getMemoryVT(), + N->getMask(), ExtSrc0, N->getMemoryVT(), N->getMemOperand(), ISD::SEXTLOAD); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue ExtSrc0 = GetPromotedInteger(N->getValue()); + assert(NVT == ExtSrc0.getValueType() && + "Gather result type and the passThru agrument type should be the same"); + + SDLoc dl(N); + SDValue Ops[] = {N->getChain(), ExtSrc0, N->getMask(), N->getBasePtr(), + N->getIndex()}; + SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other), + N->getMemoryVT(), dl, Ops, + N->getMemOperand()); + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); return Res; } + /// Promote the overflow flag of an overflowing arithmetic node. SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { // Simply change the return type of the boolean result. @@ -534,14 +566,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { return Res; } -SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) { - // Sign extend the input. - SDValue LHS = SExtPromotedInteger(N->getOperand(0)); - SDValue RHS = SExtPromotedInteger(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), SDLoc(N), - LHS.getValueType(), LHS, RHS); -} - SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) { SDValue LHS = GetPromotedInteger(N->getOperand(1)); SDValue RHS = GetPromotedInteger(N->getOperand(2)); @@ -629,6 +653,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { LHS.getValueType(), LHS, RHS); } +SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) { + // Sign extend the input. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), SDLoc(N), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) { + // Zero extend the input. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), SDLoc(N), + LHS.getValueType(), LHS, RHS); +} + SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); @@ -770,14 +810,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { return Mul; } -SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) { - // Zero extend the input. - SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); - SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), SDLoc(N), - LHS.getValueType(), LHS, RHS); -} - SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) { return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0))); @@ -875,6 +907,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { OpNo); break; case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast<MaskedLoadSDNode>(N), OpNo); break; + case ISD::MGATHER: Res = PromoteIntOp_MGATHER(cast<MaskedGatherSDNode>(N), + OpNo); break; + case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast<MaskedScatterSDNode>(N), + OpNo); break; case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; case ISD::FP16_TO_FP: case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; @@ -1143,56 +1179,49 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ N->getMemoryVT(), N->getMemOperand()); } -SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){ +SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, + unsigned OpNo) { SDValue DataOp = N->getValue(); EVT DataVT = DataOp.getValueType(); SDValue Mask = N->getMask(); - EVT MaskVT = Mask.getValueType(); SDLoc dl(N); bool TruncateStore = false; - if (!TLI.isTypeLegal(DataVT)) { - if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) { - DataOp = GetPromotedInteger(DataOp); - if (!TLI.isTypeLegal(MaskVT)) - Mask = PromoteTargetBoolean(Mask, DataOp.getValueType()); - TruncateStore = true; - } + if (OpNo == 2) { + // Mask comes before the data operand. If the data operand is legal, we just + // promote the mask. + // When the data operand has illegal type, we should legalize the data + // operand first. The mask will be promoted/splitted/widened according to + // the data operand type. + if (TLI.isTypeLegal(DataVT)) + Mask = PromoteTargetBoolean(Mask, DataVT); else { - assert(getTypeAction(DataVT) == TargetLowering::TypeWidenVector && - "Unexpected data legalization in MSTORE"); - DataOp = GetWidenedVector(DataOp); - - if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) - Mask = GetWidenedVector(Mask); - else { - EVT BoolVT = getSetCCResultType(DataOp.getValueType()); + if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) + return PromoteIntOp_MSTORE(N, 3); - // We can't use ModifyToType() because we should fill the mask with - // zeroes - unsigned WidenNumElts = BoolVT.getVectorNumElements(); - unsigned MaskNumElts = MaskVT.getVectorNumElements(); + else if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector) + return WidenVecOp_MSTORE(N, 3); - unsigned NumConcat = WidenNumElts / MaskNumElts; - SmallVector<SDValue, 16> Ops(NumConcat); - SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT); - Ops[0] = Mask; - for (unsigned i = 1; i != NumConcat; ++i) - Ops[i] = ZeroVal; - - Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops); + else { + assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector); + return SplitVecOp_MSTORE(N, 3); } } + } else { // Data operand + assert(OpNo == 3 && "Unexpected operand for promotion"); + DataOp = GetPromotedInteger(DataOp); + Mask = PromoteTargetBoolean(Mask, DataOp.getValueType()); + TruncateStore = true; } - else - Mask = PromoteTargetBoolean(N->getMask(), DataOp.getValueType()); + return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask, N->getMemoryVT(), N->getMemOperand(), TruncateStore); } -SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){ +SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, + unsigned OpNo) { assert(OpNo == 2 && "Only know how to promote the mask!"); EVT DataVT = N->getValueType(0); SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); @@ -1201,6 +1230,31 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo) return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N, + unsigned OpNo) { + + SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end()); + if (OpNo == 2) { + // The Mask + EVT DataVT = N->getValueType(0); + NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + } else + NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, + unsigned OpNo) { + SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end()); + if (OpNo == 2) { + // The Mask + EVT DataVT = N->getValue().getValueType(); + NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + } else + NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op); @@ -1259,6 +1313,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break; case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break; case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break; + case ISD::BITREVERSE: ExpandIntRes_BITREVERSE(N, Lo, Hi); break; case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break; case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break; case ISD::CTLZ_ZERO_UNDEF: @@ -1270,6 +1325,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break; case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break; + case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break; case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break; case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break; case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break; @@ -1763,12 +1819,6 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); } -void DAGTypeLegalizer::ExpandIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo, - SDValue &Lo, SDValue &Hi) { - SDValue Res = DisintegrateMERGE_VALUES(N, ResNo); - SplitInteger(Res, Lo, Hi); -} - void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); @@ -1834,6 +1884,14 @@ void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N, } } +void DAGTypeLegalizer::ExpandIntRes_BITREVERSE(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands. + Lo = DAG.getNode(ISD::BITREVERSE, dl, Lo.getValueType(), Lo); + Hi = DAG.getNode(ISD::BITREVERSE, dl, Hi.getValueType(), Hi); +} + void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); @@ -1918,8 +1976,7 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, - dl).first, + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, true/*irrelevant*/, dl).first, Lo, Hi); } @@ -1934,8 +1991,7 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, - dl).first, + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, false/*irrelevant*/, dl).first, Lo, Hi); } @@ -2055,7 +2111,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, } } - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Ch); } @@ -2096,11 +2152,21 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/, - dl).first, + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true/*irrelevant*/, dl).first, Lo, Hi); } +void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc DL(N); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDVTList VTs = DAG.getVTList(NVT, NVT, MVT::Other); + SDValue R = DAG.getNode(N->getOpcode(), DL, VTs, N->getOperand(0)); + Lo = R.getValue(0); + Hi = R.getValue(1); + ReplaceValueWith(SDValue(N, 1), R.getValue(2)); +} + void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, SDValue &Lo, SDValue &Hi) { SDValue LHS = Node->getOperand(0); @@ -2166,7 +2232,7 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, LC = RTLIB::SDIV_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, @@ -2261,8 +2327,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl).first, Lo, - Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, isSigned, dl).first, Lo, Hi); return; } @@ -2352,7 +2417,7 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, LC = RTLIB::SREM_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, @@ -2499,7 +2564,7 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, LC = RTLIB::UDIV_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, @@ -2525,7 +2590,7 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, LC = RTLIB::UREM_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, @@ -2605,6 +2670,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break; case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break; case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; + case ISD::SETCCE: Res = ExpandIntOp_SETCCE(N); break; case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break; case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break; case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break; @@ -2732,6 +2798,47 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, return; } + if (LHSHi == RHSHi) { + // Comparing the low bits is enough. + NewLHS = Tmp1; + NewRHS = SDValue(); + return; + } + + // Lower with SETCCE if the target supports it. + // FIXME: Make all targets support this, then remove the other lowering. + if (TLI.getOperationAction( + ISD::SETCCE, + TLI.getTypeToExpandTo(*DAG.getContext(), LHSLo.getValueType())) == + TargetLowering::Custom) { + // SETCCE can detect < and >= directly. For > and <=, flip operands and + // condition code. + bool FlipOperands = false; + switch (CCCode) { + case ISD::SETGT: CCCode = ISD::SETLT; FlipOperands = true; break; + case ISD::SETUGT: CCCode = ISD::SETULT; FlipOperands = true; break; + case ISD::SETLE: CCCode = ISD::SETGE; FlipOperands = true; break; + case ISD::SETULE: CCCode = ISD::SETUGE; FlipOperands = true; break; + default: break; + } + if (FlipOperands) { + std::swap(LHSLo, RHSLo); + std::swap(LHSHi, RHSHi); + } + // Perform a wide subtraction, feeding the carry from the low part into + // SETCCE. The SETCCE operation is essentially looking at the high part of + // the result of LHS - RHS. It is negative iff LHS < RHS. It is zero or + // positive iff LHS >= RHS. + SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue); + SDValue LowCmp = DAG.getNode(ISD::SUBC, dl, VTList, LHSLo, RHSLo); + SDValue Res = + DAG.getNode(ISD::SETCCE, dl, getSetCCResultType(LHSLo.getValueType()), + LHSHi, RHSHi, LowCmp.getValue(1), DAG.getCondCode(CCCode)); + NewLHS = Res; + NewRHS = SDValue(); + return; + } + NewLHS = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, ISD::SETEQ, false, DagCombineInfo, dl); @@ -2796,6 +2903,24 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { DAG.getCondCode(CCCode)), 0); } +SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + SDValue Cond = N->getOperand(3); + SDLoc dl = SDLoc(N); + + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetExpandedInteger(LHS, LHSLo, LHSHi); + GetExpandedInteger(RHS, RHSLo, RHSHi); + + // Expand to a SUBE for the low part and a smaller SETCCE for the high. + SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue); + SDValue LowCmp = DAG.getNode(ISD::SUBE, dl, VTList, LHSLo, RHSLo, Carry); + return DAG.getNode(ISD::SETCCE, dl, N->getValueType(0), LHSHi, RHSHi, + LowCmp.getValue(1), Cond); +} + SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { // The value being shifted is legal, but the shift amount is too big. // It follows that either the result of the shift is undefined, or the @@ -2820,7 +2945,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this SINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, SDLoc(N)).first; + return TLI.makeLibCall(DAG, LC, DstVT, Op, true, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -2980,11 +3105,10 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { // Load the value out, extending it from f32 to the destination float type. // FIXME: Avoid the extend by constructing the right constant pool? - SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), - FudgePtr, - MachinePointerInfo::getConstantPool(), - MVT::f32, - false, false, false, Alignment); + SDValue Fudge = DAG.getExtLoad( + ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), FudgePtr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, + false, false, false, Alignment); return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); } @@ -2992,7 +3116,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this UINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl).first; + return TLI.makeLibCall(DAG, LC, DstVT, Op, true, dl).first; } SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 54cfaf570619..2a0b0aa44794 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -73,21 +73,20 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { // (for example because it was created but not used). In general, we cannot // distinguish between new nodes and deleted nodes. SmallVector<SDNode*, 16> NewNodes; - for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = DAG.allnodes_end(); I != E; ++I) { + for (SDNode &Node : DAG.allnodes()) { // Remember nodes marked NewNode - they are subject to extra checking below. - if (I->getNodeId() == NewNode) - NewNodes.push_back(I); + if (Node.getNodeId() == NewNode) + NewNodes.push_back(&Node); - for (unsigned i = 0, e = I->getNumValues(); i != e; ++i) { - SDValue Res(I, i); + for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) { + SDValue Res(&Node, i); bool Failed = false; unsigned Mapped = 0; if (ReplacedValues.find(Res) != ReplacedValues.end()) { Mapped |= 1; // Check that remapped values are only used by nodes marked NewNode. - for (SDNode::use_iterator UI = I->use_begin(), UE = I->use_end(); + for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end(); UI != UE; ++UI) if (UI.getUse().getResNo() == i) assert(UI->getNodeId() == NewNode && @@ -119,16 +118,16 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { if (WidenedVectors.find(Res) != WidenedVectors.end()) Mapped |= 128; - if (I->getNodeId() != Processed) { + if (Node.getNodeId() != Processed) { // Since we allow ReplacedValues to map deleted nodes, it may map nodes // marked NewNode too, since a deleted node may have been reallocated as // another node that has not been seen by the LegalizeTypes machinery. - if ((I->getNodeId() == NewNode && Mapped > 1) || - (I->getNodeId() != NewNode && Mapped != 0)) { + if ((Node.getNodeId() == NewNode && Mapped > 1) || + (Node.getNodeId() != NewNode && Mapped != 0)) { dbgs() << "Unprocessed value in a map!"; Failed = true; } - } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) { + } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) { if (Mapped > 1) { dbgs() << "Value with legal type was transformed!"; Failed = true; @@ -194,13 +193,12 @@ bool DAGTypeLegalizer::run() { // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess' // (and remembering them) if they are leaves and assigning 'Unanalyzed' if // non-leaves. - for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = DAG.allnodes_end(); I != E; ++I) { - if (I->getNumOperands() == 0) { - I->setNodeId(ReadyToProcess); - Worklist.push_back(I); + for (SDNode &Node : DAG.allnodes()) { + if (Node.getNumOperands() == 0) { + Node.setNodeId(ReadyToProcess); + Worklist.push_back(&Node); } else { - I->setNodeId(Unanalyzed); + Node.setNodeId(Unanalyzed); } } @@ -240,9 +238,13 @@ bool DAGTypeLegalizer::run() { Changed = true; goto NodeDone; case TargetLowering::TypeSoftenFloat: - SoftenFloatResult(N, i); - Changed = true; - goto NodeDone; + Changed = SoftenFloatResult(N, i); + if (Changed) + goto NodeDone; + // If not changed, the result type should be legally in register. + assert(isLegalInHWReg(ResultVT) && + "Unchanged SoftenFloatResult should be legal in register!"); + goto ScanOperands; case TargetLowering::TypeExpandFloat: ExpandFloatResult(N, i); Changed = true; @@ -409,40 +411,48 @@ NodeDone: // In a debug build, scan all the nodes to make sure we found them all. This // ensures that there are no cycles and that everything got processed. #ifndef NDEBUG - for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = DAG.allnodes_end(); I != E; ++I) { + for (SDNode &Node : DAG.allnodes()) { bool Failed = false; // Check that all result types are legal. - if (!IgnoreNodeResults(I)) - for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i) - if (!isTypeLegal(I->getValueType(i))) { - dbgs() << "Result type " << i << " illegal!\n"; + // A value type is illegal if its TypeAction is not TypeLegal, + // and TLI.RegClassForVT does not have a register class for this type. + // For example, the x86_64 target has f128 that is not TypeLegal, + // to have softened operators, but it also has FR128 register class to + // pass and return f128 values. Hence a legalized node can have f128 type. + if (!IgnoreNodeResults(&Node)) + for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i) + if (!isTypeLegal(Node.getValueType(i)) && + !TLI.isTypeLegal(Node.getValueType(i))) { + dbgs() << "Result type " << i << " illegal: "; + Node.dump(); Failed = true; } // Check that all operand types are legal. - for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i) - if (!IgnoreNodeResults(I->getOperand(i).getNode()) && - !isTypeLegal(I->getOperand(i).getValueType())) { - dbgs() << "Operand type " << i << " illegal!\n"; + for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i) + if (!IgnoreNodeResults(Node.getOperand(i).getNode()) && + !isTypeLegal(Node.getOperand(i).getValueType()) && + !TLI.isTypeLegal(Node.getOperand(i).getValueType())) { + dbgs() << "Operand type " << i << " illegal: "; + Node.getOperand(i).dump(); Failed = true; } - if (I->getNodeId() != Processed) { - if (I->getNodeId() == NewNode) + if (Node.getNodeId() != Processed) { + if (Node.getNodeId() == NewNode) dbgs() << "New node not analyzed?\n"; - else if (I->getNodeId() == Unanalyzed) + else if (Node.getNodeId() == Unanalyzed) dbgs() << "Unanalyzed node not noticed?\n"; - else if (I->getNodeId() > 0) + else if (Node.getNodeId() > 0) dbgs() << "Operand not processed?\n"; - else if (I->getNodeId() == ReadyToProcess) + else if (Node.getNodeId() == ReadyToProcess) dbgs() << "Not added to worklist?\n"; Failed = true; } if (Failed) { - I->dump(&DAG); dbgs() << "\n"; + Node.dump(&DAG); dbgs() << "\n"; llvm_unreachable(nullptr); } } @@ -751,13 +761,23 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { - assert(Result.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + // f128 of x86_64 could be kept in SSE registers, + // but sometimes softened to i128. + assert((Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) || + Op.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && "Invalid type for softened float"); AnalyzeNewValue(Result); SDValue &OpEntry = SoftenedFloats[Op]; - assert(!OpEntry.getNode() && "Node is already converted to integer!"); + // Allow repeated calls to save f128 type nodes + // or any node with type that transforms to itself. + // Many operations on these types are not softened. + assert((!OpEntry.getNode()|| + Op.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && + "Node is already converted to integer!"); OpEntry = Result; } @@ -1042,23 +1062,22 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, unsigned NumOps = N->getNumOperands(); SDLoc dl(N); if (NumOps == 0) { - return TLI.makeLibCall(DAG, LC, N->getValueType(0), nullptr, 0, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, isSigned, dl).first; } else if (NumOps == 1) { SDValue Op = N->getOperand(0); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, isSigned, dl).first; } else if (NumOps == 2) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first; } SmallVector<SDValue, 8> Ops(NumOps); for (unsigned i = 0; i < NumOps; ++i) Ops[i] = N->getOperand(i); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), - &Ops[0], NumOps, isSigned, dl).first; + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first; } // ExpandChainLibCall - Expand a node into a call to a libcall. Similar to @@ -1108,6 +1127,23 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) { return DAG.getNode(ExtendCode, dl, BoolVT, Bool); } +/// WidenTargetBoolean - Widen the given target boolean to a target boolean +/// of the given type. The boolean vector is widened and then promoted to match +/// the target boolean type of the given ValVT. +SDValue DAGTypeLegalizer::WidenTargetBoolean(SDValue Bool, EVT ValVT, + bool WithZeroes) { + SDLoc dl(Bool); + EVT BoolVT = Bool.getValueType(); + + assert(ValVT.getVectorNumElements() > BoolVT.getVectorNumElements() && + TLI.isTypeLegal(ValVT) && + "Unexpected types in WidenTargetBoolean"); + EVT WideVT = EVT::getVectorVT(*DAG.getContext(), BoolVT.getScalarType(), + ValVT.getVectorNumElements()); + Bool = ModifyToType(Bool, WideVT, WithZeroes); + return PromoteTargetBoolean(Bool, ValVT); +} + /// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT /// bits in Hi. void DAGTypeLegalizer::SplitInteger(SDValue Op, diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index d1131a74cf17..8ba19f76797f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -72,6 +72,20 @@ private: return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal; } + /// isSimpleLegalType - Return true if this is a simple legal type. + bool isSimpleLegalType(EVT VT) const { + return VT.isSimple() && TLI.isTypeLegal(VT); + } + + /// isLegalInHWReg - Return true if this type can be passed in registers. + /// For example, x86_64's f128, should to be legally in registers + /// and only some operations converted to library calls or integer + /// bitwise operations. + bool isLegalInHWReg(EVT VT) const { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return VT == NVT && isSimpleLegalType(VT); + } + EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } @@ -173,6 +187,11 @@ private: std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT); + + /// Modify Bit Vector to match SetCC result type of ValVT. + /// The bit vector is widened with zeroes when WithZeroes is true. + SDValue WidenTargetBoolean(SDValue Bool, EVT ValVT, bool WithZeroes = false); + void ReplaceValueWith(SDValue From, SDValue To); void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, @@ -234,6 +253,7 @@ private: SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N); SDValue PromoteIntRes_BITCAST(SDNode *N); SDValue PromoteIntRes_BSWAP(SDNode *N); + SDValue PromoteIntRes_BITREVERSE(SDNode *N); SDValue PromoteIntRes_BUILD_PAIR(SDNode *N); SDValue PromoteIntRes_Constant(SDNode *N); SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N); @@ -246,21 +266,22 @@ private: SDValue PromoteIntRes_INT_EXTEND(SDNode *N); SDValue PromoteIntRes_LOAD(LoadSDNode *N); SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N); + SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N); SDValue PromoteIntRes_Overflow(SDNode *N); SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo); - SDValue PromoteIntRes_SDIV(SDNode *N); SDValue PromoteIntRes_SELECT(SDNode *N); SDValue PromoteIntRes_VSELECT(SDNode *N); SDValue PromoteIntRes_SELECT_CC(SDNode *N); SDValue PromoteIntRes_SETCC(SDNode *N); SDValue PromoteIntRes_SHL(SDNode *N); SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N); + SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N); + SDValue PromoteIntRes_SExtIntBinOp(SDNode *N); SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N); SDValue PromoteIntRes_SRA(SDNode *N); SDValue PromoteIntRes_SRL(SDNode *N); SDValue PromoteIntRes_TRUNCATE(SDNode *N); SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo); - SDValue PromoteIntRes_UDIV(SDNode *N); SDValue PromoteIntRes_UNDEF(SDNode *N); SDValue PromoteIntRes_VAARG(SDNode *N); SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); @@ -276,7 +297,6 @@ private: SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N); SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N); SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo); - SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N); SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); @@ -284,7 +304,6 @@ private: SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo); - SDValue PromoteIntOp_VSETCC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_Shift(SDNode *N); SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N); SDValue PromoteIntOp_SINT_TO_FP(SDNode *N); @@ -294,6 +313,8 @@ private: SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N); SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -312,8 +333,6 @@ private: // Integer Result Expansion. void ExpandIntegerResult(SDNode *N, unsigned ResNo); - void ExpandIntRes_MERGE_VALUES (SDNode *N, unsigned ResNo, - SDValue &Lo, SDValue &Hi); void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -322,6 +341,7 @@ private: void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_READCYCLECOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -333,6 +353,7 @@ private: void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -354,12 +375,10 @@ private: // Integer Operand Expansion. bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo); - SDValue ExpandIntOp_BITCAST(SDNode *N); SDValue ExpandIntOp_BR_CC(SDNode *N); - SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N); - SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N); SDValue ExpandIntOp_SELECT_CC(SDNode *N); SDValue ExpandIntOp_SETCC(SDNode *N); + SDValue ExpandIntOp_SETCCE(SDNode *N); SDValue ExpandIntOp_Shift(SDNode *N); SDValue ExpandIntOp_SINT_TO_FP(SDNode *N); SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo); @@ -375,32 +394,48 @@ private: // Float to Integer Conversion Support: LegalizeFloatTypes.cpp //===--------------------------------------------------------------------===// - /// GetSoftenedFloat - Given a processed operand Op which was converted to an - /// integer of the same size, this returns the integer. The integer contains - /// exactly the same bits as Op - only the type changed. For example, if Op - /// is an f32 which was softened to an i32, then this method returns an i32, - /// the bits of which coincide with those of Op. + /// GetSoftenedFloat - Given an operand Op of Float type, returns the integer + /// if the Op is not supported in target HW and converted to the integer. + /// The integer contains exactly the same bits as Op - only the type changed. + /// For example, if Op is an f32 which was softened to an i32, then this method + /// returns an i32, the bits of which coincide with those of Op. + /// If the Op can be efficiently supported in target HW or the operand must + /// stay in a register, the Op is not converted to an integer. + /// In that case, the given op is returned. SDValue GetSoftenedFloat(SDValue Op) { SDValue &SoftenedOp = SoftenedFloats[Op]; + if (!SoftenedOp.getNode() && + isSimpleLegalType(Op.getValueType())) + return Op; RemapValue(SoftenedOp); assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?"); return SoftenedOp; } void SetSoftenedFloat(SDValue Op, SDValue Result); - // Result Float to Integer Conversion. - void SoftenFloatResult(SDNode *N, unsigned OpNo); + // Call ReplaceValueWith(SDValue(N, ResNo), Res) if necessary. + void ReplaceSoftenFloatResult(SDNode *N, unsigned ResNo, SDValue &NewRes) { + // When the result type can be kept in HW registers, the converted + // NewRes node could have the same type. We can save the effort in + // cloning every user of N in SoftenFloatOperand or other legalization functions, + // by calling ReplaceValueWith here to update all users. + if (NewRes.getNode() != N && isLegalInHWReg(N->getValueType(ResNo))) + ReplaceValueWith(SDValue(N, ResNo), NewRes); + } + + // Convert Float Results to Integer for Non-HW-supported Operations. + bool SoftenFloatResult(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_BITCAST(SDNode *N); + SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); - SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N); + SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); - SDValue SoftenFloatRes_FABS(SDNode *N); + SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FMINNUM(SDNode *N); SDValue SoftenFloatRes_FMAXNUM(SDNode *N); SDValue SoftenFloatRes_FADD(SDNode *N); SDValue SoftenFloatRes_FCEIL(SDNode *N); - SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); + SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FCOS(SDNode *N); SDValue SoftenFloatRes_FDIV(SDNode *N); SDValue SoftenFloatRes_FEXP(SDNode *N); @@ -412,7 +447,7 @@ private: SDValue SoftenFloatRes_FMA(SDNode *N); SDValue SoftenFloatRes_FMUL(SDNode *N); SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); - SDValue SoftenFloatRes_FNEG(SDNode *N); + SDValue SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FP_EXTEND(SDNode *N); SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N); SDValue SoftenFloatRes_FP_ROUND(SDNode *N); @@ -425,21 +460,25 @@ private: SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); SDValue SoftenFloatRes_FTRUNC(SDNode *N); - SDValue SoftenFloatRes_LOAD(SDNode *N); - SDValue SoftenFloatRes_SELECT(SDNode *N); - SDValue SoftenFloatRes_SELECT_CC(SDNode *N); + SDValue SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_UNDEF(SDNode *N); SDValue SoftenFloatRes_VAARG(SDNode *N); SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); - // Operand Float to Integer Conversion. + // Return true if we can skip softening the given operand or SDNode because + // it was soften before by SoftenFloatResult and references to the operand + // were replaced by ReplaceValueWith. + bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo); + + // Convert Float Operand to Integer for Non-HW-supported Operations. bool SoftenFloatOperand(SDNode *N, unsigned OpNo); SDValue SoftenFloatOp_BITCAST(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); - SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N); - SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N); + SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N); SDValue SoftenFloatOp_SELECT_CC(SDNode *N); SDValue SoftenFloatOp_SETCC(SDNode *N); SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); @@ -575,7 +614,6 @@ private: SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); - SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT_CC(SDNode *N); @@ -617,20 +655,18 @@ private: void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_MGATHER(MaskedGatherSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, SDValue &Hi); @@ -650,6 +686,7 @@ private: SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); SDValue SplitVecOp_VSETCC(SDNode *N); SDValue SplitVecOp_FP_ROUND(SDNode *N); + SDValue SplitVecOp_FCOPYSIGN(SDNode *N); //===--------------------------------------------------------------------===// // Vector Widening Support: LegalizeVectorTypes.cpp @@ -680,8 +717,8 @@ private: SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); + SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N); SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N); - SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N); SDValue WidenVecRes_SELECT(SDNode* N); SDValue WidenVecRes_SELECT_CC(SDNode* N); SDValue WidenVecRes_SETCC(SDNode* N); @@ -693,6 +730,7 @@ private: SDValue WidenVecRes_Binary(SDNode *N); SDValue WidenVecRes_BinaryCanTrap(SDNode *N); SDValue WidenVecRes_Convert(SDNode *N); + SDValue WidenVecRes_FCOPYSIGN(SDNode *N); SDValue WidenVecRes_POWI(SDNode *N); SDValue WidenVecRes_Shift(SDNode *N); SDValue WidenVecRes_Unary(SDNode *N); @@ -707,9 +745,11 @@ private: SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo); + SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_SETCC(SDNode* N); SDValue WidenVecOp_Convert(SDNode *N); + SDValue WidenVecOp_FCOPYSIGN(SDNode *N); //===--------------------------------------------------------------------===// // Vector Widening Utilities Support: LegalizeVectorTypes.cpp @@ -745,8 +785,10 @@ private: /// Modifies a vector input (widen or narrows) to a vector of NVT. The /// input vector must have the same element type as NVT. - SDValue ModifyToType(SDValue InOp, EVT WidenVT); - + /// When FillWithZeroes is "on" the vector will be widened with + /// zeroes. + /// By default, the vector will be widened with undefined values. + SDValue ModifyToType(SDValue InOp, EVT NVT, bool FillWithZeroes = false); //===--------------------------------------------------------------------===// // Generic Splitting: LegalizeTypesGeneric.cpp diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 14d8f7762086..593c346df770 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -53,12 +53,17 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { case TargetLowering::TypePromoteFloat: llvm_unreachable("Bitcast of a promotion-needing float should never need" "expansion"); - case TargetLowering::TypeSoftenFloat: - // Convert the integer operand instead. - SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); + case TargetLowering::TypeSoftenFloat: { + // Expand the floating point operand only if it was converted to integers. + // Otherwise, it is a legal type like f128 that can be saved in a register. + auto SoftenedOp = GetSoftenedFloat(InOp); + if (SoftenedOp == InOp) + break; + SplitInteger(SoftenedOp, Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; + } case TargetLowering::TypeExpandInteger: case TargetLowering::TypeExpandFloat: { auto &DL = DAG.getDataLayout(); @@ -161,7 +166,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { NOutVT.getTypeForEVT(*DAG.getContext())); SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); - MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); // Emit a store to the stack slot. SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo, diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 83d4ad5ea1f4..f61f631e2ff8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -105,6 +105,8 @@ class VectorLegalizer { SDValue ExpandLoad(SDValue Op); SDValue ExpandStore(SDValue Op); SDValue ExpandFNEG(SDValue Op); + SDValue ExpandBITREVERSE(SDValue Op); + SDValue ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op); /// \brief Implements vector promotion. /// @@ -159,7 +161,7 @@ bool VectorLegalizer::Run() { DAG.AssignTopologicalOrder(); for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) - LegalizeOp(SDValue(I, 0)); + LegalizeOp(SDValue(&*I, 0)); // Finally, it's possible the root changed. Get the new root. SDValue OldRoot = DAG.getRoot(); @@ -218,9 +220,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { assert(Result.getValue(1).use_empty() && "There are still live users of the old chain!"); return LegalizeOp(Lowered); - } else { - return TranslateLegalizeResults(Op, Lowered); } + return TranslateLegalizeResults(Op, Lowered); } case TargetLowering::Expand: Changed = true; @@ -231,7 +232,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { EVT StVT = ST->getMemoryVT(); MVT ValVT = ST->getValue().getSimpleValueType(); if (StVT.isVector() && ST->isTruncatingStore()) - switch (TLI.getTruncStoreAction(ValVT, StVT.getSimpleVT())) { + switch (TLI.getTruncStoreAction(ValVT, StVT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: return TranslateLegalizeResults(Op, Result); @@ -244,7 +245,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Changed = true; return LegalizeOp(ExpandStore(Op)); } - } else if (Op.getOpcode() == ISD::MSCATTER) + } else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE) HasVectorValue = true; for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); @@ -265,6 +266,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::UDIV: case ISD::SREM: case ISD::UREM: + case ISD::SDIVREM: + case ISD::UDIVREM: case ISD::FADD: case ISD::FSUB: case ISD::FMUL: @@ -279,6 +282,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::ROTL: case ISD::ROTR: case ISD::BSWAP: + case ISD::BITREVERSE: case ISD::CTLZ: case ISD::CTTZ: case ISD::CTLZ_ZERO_UNDEF: @@ -298,6 +302,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FABS: case ISD::FMINNUM: case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: case ISD::FCOPYSIGN: case ISD::FSQRT: case ISD::FSIN: @@ -338,9 +344,13 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::MSCATTER: QueryType = cast<MaskedScatterSDNode>(Node)->getValue().getValueType(); break; + case ISD::MSTORE: + QueryType = cast<MaskedStoreSDNode>(Node)->getValue().getValueType(); + break; } switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Promote: Result = Promote(Op); Changed = true; @@ -411,7 +421,7 @@ SDValue VectorLegalizer::Promote(SDValue Op) { Operands[j] = Op.getOperand(j); } - Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands); + Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags()); if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())) @@ -708,6 +718,11 @@ SDValue VectorLegalizer::Expand(SDValue Op) { return ExpandFNEG(Op); case ISD::SETCC: return UnrollVSETCC(Op); + case ISD::BITREVERSE: + return ExpandBITREVERSE(Op); + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ_ZERO_UNDEF: + return ExpandCTLZ_CTTZ_ZERO_UNDEF(Op); default: return DAG.UnrollVectorOp(Op.getNode()); } @@ -893,6 +908,25 @@ SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { return DAG.getNode(ISD::BITCAST, DL, VT, Op); } +SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) { + EVT VT = Op.getValueType(); + + // If we have the scalar operation, it's probably cheaper to unroll it. + if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) + return DAG.UnrollVectorOp(Op.getNode()); + + // If we have the appropriate vector bit operations, it is better to use them + // than unrolling and expanding each component. + if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) || + !TLI.isOperationLegalOrCustom(ISD::SRL, VT) || + !TLI.isOperationLegalOrCustom(ISD::AND, VT) || + !TLI.isOperationLegalOrCustom(ISD::OR, VT)) + return DAG.UnrollVectorOp(Op.getNode()); + + // Let LegalizeDAG handle this later. + return Op; +} + SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. @@ -971,6 +1005,7 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { // Convert hi and lo to floats // Convert the hi part back to the upper values + // TODO: Can any fast-math-flags be set on these nodes? SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI); fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW); SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO); @@ -984,12 +1019,23 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { SDLoc DL(Op); SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType()); + // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB. return DAG.getNode(ISD::FSUB, DL, Op.getValueType(), Zero, Op.getOperand(0)); } return DAG.UnrollVectorOp(Op.getNode()); } +SDValue VectorLegalizer::ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op) { + // If the non-ZERO_UNDEF version is supported we can let LegalizeDAG handle. + unsigned Opc = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ? ISD::CTLZ : ISD::CTTZ; + if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType())) + return Op; + + // Otherwise go ahead and unroll. + return DAG.UnrollVectorOp(Op.getNode()); +} + SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { EVT VT = Op.getValueType(); unsigned NumElems = VT.getVectorNumElements(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 51cd6619f783..d0187d36dee2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -67,6 +67,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; case ISD::ANY_EXTEND: + case ISD::BITREVERSE: case ISD::BSWAP: case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: @@ -108,6 +109,12 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FMUL: case ISD::FMINNUM: case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: case ISD::FPOW: case ISD::FREM: @@ -139,7 +146,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(0)); SDValue RHS = GetScalarizedVector(N->getOperand(1)); return DAG.getNode(N->getOpcode(), SDLoc(N), - LHS.getValueType(), LHS, RHS); + LHS.getValueType(), LHS, RHS, N->getFlags()); } SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { @@ -228,7 +235,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { N->isInvariant(), N->getOriginalAlignment(), N->getAAInfo()); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); return Result; @@ -594,6 +601,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break; case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; + case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break; case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; @@ -613,6 +621,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break; + case ISD::BITREVERSE: case ISD::BSWAP: case ISD::CONVERT_RNDSAT: case ISD::CTLZ: @@ -656,11 +665,12 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SUB: case ISD::MUL: case ISD::FADD: - case ISD::FCOPYSIGN: case ISD::FSUB: case ISD::FMUL: case ISD::FMINNUM: case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: case ISD::SDIV: case ISD::UDIV: case ISD::FDIV: @@ -698,8 +708,10 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, GetSplitVector(N->getOperand(1), RHSLo, RHSHi); SDLoc dl(N); - Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo); - Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi); + const SDNodeFlags *Flags = N->getFlags(); + unsigned Opcode = N->getOpcode(); + Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags); + Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags); } void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, @@ -870,6 +882,25 @@ void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1)); } +void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + SDLoc DL(N); + + SDValue RHSLo, RHSHi; + SDValue RHS = N->getOperand(1); + EVT RHSVT = RHS.getValueType(); + if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector) + GetSplitVector(RHS, RHSLo, RHSHi); + else + std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS)); + + + Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLo.getValueType(), LHSLo, RHSLo); + Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi); +} + void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LHSLo, LHSHi; @@ -989,7 +1020,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(LD, 1), Ch); } @@ -1003,6 +1034,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue Ch = MLD->getChain(); SDValue Ptr = MLD->getBasePtr(); SDValue Mask = MLD->getMask(); + SDValue Src0 = MLD->getSrc0(); unsigned Alignment = MLD->getOriginalAlignment(); ISD::LoadExtType ExtType = MLD->getExtensionType(); @@ -1012,16 +1044,22 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? Alignment/2 : Alignment; + // Split Mask operand SDValue MaskLo, MaskHi; - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); EVT MemoryVT = MLD->getMemoryVT(); EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - SDValue Src0 = MLD->getSrc0(); SDValue Src0Lo, Src0Hi; - std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); + if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Src0, Src0Lo, Src0Hi); + else + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MLD->getPointerInfo(), @@ -1049,7 +1087,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(MLD, 1), Ch); @@ -1064,20 +1102,33 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue Ch = MGT->getChain(); SDValue Ptr = MGT->getBasePtr(); SDValue Mask = MGT->getMask(); + SDValue Src0 = MGT->getValue(); + SDValue Index = MGT->getIndex(); unsigned Alignment = MGT->getOriginalAlignment(); + // Split Mask operand SDValue MaskLo, MaskHi; - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); EVT MemoryVT = MGT->getMemoryVT(); EVT LoMemVT, HiMemVT; + // Split MemoryVT std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); SDValue Src0Lo, Src0Hi; - std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl); + if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Src0, Src0Lo, Src0Hi); + else + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); SDValue IndexHi, IndexLo; - std::tie(IndexLo, IndexHi) = DAG.SplitVector(MGT->getIndex(), dl); + if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Index, IndexLo, IndexHi); + else + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MGT->getPointerInfo(), @@ -1097,7 +1148,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(MGT, 1), Ch); } @@ -1357,6 +1408,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { Res = SplitVecOp_TruncateHelper(N); break; case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; + case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break; case ISD::STORE: Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); break; @@ -1567,23 +1619,31 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue Ptr = MGT->getBasePtr(); SDValue Index = MGT->getIndex(); SDValue Mask = MGT->getMask(); + SDValue Src0 = MGT->getValue(); unsigned Alignment = MGT->getOriginalAlignment(); SDValue MaskLo, MaskHi; - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + // Split Mask operand + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); EVT MemoryVT = MGT->getMemoryVT(); EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); SDValue Src0Lo, Src0Hi; - std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl); + if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Src0, Src0Lo, Src0Hi); + else + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); SDValue IndexHi, IndexLo; - if (Index.getNode()) - std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); + if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Index, IndexLo, IndexHi); else - IndexLo = IndexHi = Index; + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MGT->getPointerInfo(), @@ -1609,7 +1669,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(MGT, 1), Ch); @@ -1633,9 +1693,21 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); SDValue DataLo, DataHi; - GetSplitVector(Data, DataLo, DataHi); + if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) + // Split Data operand + GetSplitVector(Data, DataLo, DataHi); + else + std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + SDValue MaskLo, MaskHi; - GetSplitVector(Mask, MaskLo, MaskHi); + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + // Split Mask operand + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + + MaskLo = PromoteTargetBoolean(MaskLo, DataLo.getValueType()); + MaskHi = PromoteTargetBoolean(MaskHi, DataHi.getValueType()); // if Alignment is equal to the vector size, // take the half of it for the second part @@ -1680,25 +1752,29 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned Alignment = N->getOriginalAlignment(); SDLoc DL(N); + // Split all operands EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); SDValue DataLo, DataHi; - GetSplitVector(Data, DataLo, DataHi); - SDValue MaskLo, MaskHi; - GetSplitVector(Mask, MaskLo, MaskHi); + if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) + // Split Data operand + GetSplitVector(Data, DataLo, DataHi); + else + std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); - SDValue PtrLo, PtrHi; - if (Ptr.getValueType().isVector()) // gather form vector of pointers - std::tie(PtrLo, PtrHi) = DAG.SplitVector(Ptr, DL); + SDValue MaskLo, MaskHi; + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + // Split Mask operand + GetSplitVector(Mask, MaskLo, MaskHi); else - PtrLo = PtrHi = Ptr; + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); SDValue IndexHi, IndexLo; - if (Index.getNode()) - std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); + if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Index, IndexLo, IndexHi); else - IndexLo = IndexHi = Index; + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); SDValue Lo, Hi; MachineMemOperand *MMO = DAG.getMachineFunction(). @@ -1706,7 +1782,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - SDValue OpsLo[] = {Ch, DataLo, MaskLo, PtrLo, IndexLo}; + SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo}; Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), DL, OpsLo, MMO); @@ -1715,7 +1791,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - SDValue OpsHi[] = {Ch, DataHi, MaskHi, PtrHi, IndexHi}; + SDValue OpsHi[] = {Ch, DataHi, MaskHi, Ptr, IndexHi}; Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), DL, OpsHi, MMO); @@ -1891,6 +1967,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); } +SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) { + // The result (and the first input) has a legal vector type, but the second + // input needs splitting. + return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements()); +} //===----------------------------------------------------------------------===// @@ -1938,6 +2019,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::MLOAD: Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N)); break; + case ISD::MGATHER: + Res = WidenVecRes_MGATHER(cast<MaskedGatherSDNode>(N)); + break; case ISD::ADD: case ISD::AND: @@ -1949,11 +2033,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::XOR: case ISD::FMINNUM: case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: Res = WidenVecRes_Binary(N); break; case ISD::FADD: - case ISD::FCOPYSIGN: case ISD::FMUL: case ISD::FPOW: case ISD::FSUB: @@ -1966,6 +2055,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_BinaryCanTrap(N); break; + case ISD::FCOPYSIGN: + Res = WidenVecRes_FCOPYSIGN(N); + break; + case ISD::FPOWI: Res = WidenVecRes_POWI(N); break; @@ -1989,6 +2082,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Convert(N); break; + case ISD::BITREVERSE: case ISD::BSWAP: case ISD::CTLZ: case ISD::CTPOP: @@ -2037,7 +2131,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags()); } SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { @@ -2048,6 +2142,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { EVT WidenEltVT = WidenVT.getVectorElementType(); EVT VT = WidenVT; unsigned NumElts = VT.getVectorNumElements(); + const SDNodeFlags *Flags = N->getFlags(); while (!TLI.isTypeLegal(VT) && NumElts != 1) { NumElts = NumElts / 2; VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); @@ -2057,7 +2152,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { // Operation doesn't trap so just widen as normal. SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags); } // No legal vector version so unroll the vector operation and then widen. @@ -2087,7 +2182,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { SDValue EOp2 = DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags); Idx += NumElts; CurNumElts -= NumElts; } @@ -2105,7 +2200,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, - EOp1, EOp2); + EOp1, EOp2, Flags); } CurNumElts = 0; } @@ -2195,7 +2290,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { unsigned Opcode = N->getOpcode(); unsigned InVTNumElts = InVT.getVectorNumElements(); - + const SDNodeFlags *Flags = N->getFlags(); if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { InOp = GetWidenedVector(N->getOperand(0)); InVT = InOp.getValueType(); @@ -2203,7 +2298,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { if (InVTNumElts == WidenNumElts) { if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InOp); - return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1)); + return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags); } } @@ -2224,7 +2319,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops); if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InVec); - return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1)); + return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags); } if (InVTNumElts % WidenNumElts == 0) { @@ -2234,7 +2329,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { // Extract the input and convert the shorten input vector. if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InVal); - return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1)); + return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags); } } @@ -2250,7 +2345,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { if (N->getNumOperands() == 1) Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val); else - Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1)); + Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags); } SDValue UndefVal = DAG.getUNDEF(EltVT); @@ -2260,6 +2355,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops); } +SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) { + // If this is an FCOPYSIGN with same input types, we can treat it as a + // normal (can trap) binary op. + if (N->getOperand(0).getValueType() == N->getOperand(1).getValueType()) + return WidenVecRes_BinaryCanTrap(N); + + // If the types are different, fall back to unrolling. + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); +} + SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); @@ -2669,7 +2775,35 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(), Mask, Src0, N->getMemoryVT(), N->getMemOperand(), ExtType); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { + + EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Mask = N->getMask(); + SDValue Src0 = GetWidenedVector(N->getValue()); + unsigned NumElts = WideVT.getVectorNumElements(); + SDLoc dl(N); + + // The mask should be widened as well + Mask = WidenTargetBoolean(Mask, WideVT, true); + + // Widen the Index operand + SDValue Index = N->getIndex(); + EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(), + Index.getValueType().getScalarType(), + NumElts); + Index = ModifyToType(Index, WideIndexVT); + SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; + SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), + N->getMemoryVT(), dl, Ops, + N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); return Res; @@ -2831,7 +2965,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break; + case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break; case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; + case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break; case ISD::ANY_EXTEND: case ISD::SIGN_EXTEND: @@ -2928,6 +3064,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { } } +SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) { + // The result (and first input) is legal, but the second input is illegal. + // We can't do much to fix that, so just unroll and let the extracts off of + // the second input be widened as needed later. + return DAG.UnrollVectorOp(N); +} + SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { // Since the result is legal and the input is illegal, it is unlikely // that we can fix the input to a legal type so unroll the convert @@ -3070,6 +3213,34 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { false); } +SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { + assert(OpNo == 1 && "Can widen only data operand of mscatter"); + MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); + SDValue DataOp = MSC->getValue(); + SDValue Mask = MSC->getMask(); + + // Widen the value + SDValue WideVal = GetWidenedVector(DataOp); + EVT WideVT = WideVal.getValueType(); + unsigned NumElts = WideVal.getValueType().getVectorNumElements(); + SDLoc dl(N); + + // The mask should be widened as well + Mask = WidenTargetBoolean(Mask, WideVT, true); + + // Widen index + SDValue Index = MSC->getIndex(); + EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(), + Index.getValueType().getScalarType(), + NumElts); + Index = ModifyToType(Index, WideIndexVT); + + SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index}; + return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), + MSC->getMemoryVT(), dl, Ops, + MSC->getMemOperand()); +} + SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SDValue InOp0 = GetWidenedVector(N->getOperand(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(1)); @@ -3533,7 +3704,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, /// Modifies a vector input (widen or narrows) to a vector of NVT. The /// input vector must have the same element type as NVT. -SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { +/// FillWithZeroes specifies that the vector should be widened with zeroes. +SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, + bool FillWithZeroes) { // Note that InOp might have been widened so it might already have // the right width or it might need be narrowed. EVT InVT = InOp.getValueType(); @@ -3550,10 +3723,11 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) { unsigned NumConcat = WidenNumElts / InNumElts; SmallVector<SDValue, 16> Ops(NumConcat); - SDValue UndefVal = DAG.getUNDEF(InVT); + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, InVT) : + DAG.getUNDEF(InVT); Ops[0] = InOp; for (unsigned i = 1; i != NumConcat; ++i) - Ops[i] = UndefVal; + Ops[i] = FillVal; return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops); } @@ -3573,8 +3747,9 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - SDValue UndefVal = DAG.getUNDEF(EltVT); + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : + DAG.getUNDEF(EltVT); for ( ; Idx < WidenNumElts; ++Idx) - Ops[Idx] = UndefVal; + Ops[Idx] = FillVal; return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 6303422b9ae9..622e06f0da2a 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -49,7 +49,7 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) TII = STI.getInstrInfo(); ResourcesModel.reset(TII->CreateTargetScheduleState(STI)); // This hard requirement could be relaxed, but for now - // do not let it procede. + // do not let it proceed. assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); unsigned NumRC = TRI->getNumRegClasses(); @@ -269,12 +269,12 @@ bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) { } // Now see if there are no other dependencies - // to instructions alredy in the packet. + // to instructions already in the packet. for (unsigned i = 0, e = Packet.size(); i != e; ++i) for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), E = Packet[i]->Succs.end(); I != E; ++I) { // Since we do not add pseudos to packets, might as well - // ignor order deps. + // ignore order deps. if (I->isCtrl()) continue; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 34e1a7001082..62e7733ecd2b 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -440,7 +440,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); NumRes = MCID.getNumDefs(); - for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -519,7 +519,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; - for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { + for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index e9bd52034ffd..91024e672f9c 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -141,8 +141,8 @@ private: /// that are "live". These nodes must be scheduled before any other nodes that /// modifies the registers can be scheduled. unsigned NumLiveRegs; - std::vector<SUnit*> LiveRegDefs; - std::vector<SUnit*> LiveRegGens; + std::unique_ptr<SUnit*[]> LiveRegDefs; + std::unique_ptr<SUnit*[]> LiveRegGens; // Collect interferences between physical register use/defs. // Each interference is an SUnit and set of physical registers. @@ -328,8 +328,8 @@ void ScheduleDAGRRList::Schedule() { NumLiveRegs = 0; // Allocate slots for each physical register, plus one for a special register // to track the virtual resource of a calling sequence. - LiveRegDefs.resize(TRI->getNumRegs() + 1, nullptr); - LiveRegGens.resize(TRI->getNumRegs() + 1, nullptr); + LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]()); + LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]()); CallSeqEndForStart.clear(); assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences"); @@ -1206,7 +1206,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); NumRes = MCID.getNumDefs(); - for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -1218,7 +1218,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, /// CheckForLiveRegDef - Return true and update live register vector if the /// specified register def of the specified SUnit clobbers any "live" registers. static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, - std::vector<SUnit*> &LiveRegDefs, + SUnit **LiveRegDefs, SmallSet<unsigned, 4> &RegAdded, SmallVectorImpl<unsigned> &LRegs, const TargetRegisterInfo *TRI) { @@ -1240,7 +1240,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered /// by RegMask, and add them to LRegs. static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, - std::vector<SUnit*> &LiveRegDefs, + ArrayRef<SUnit*> LiveRegDefs, SmallSet<unsigned, 4> &RegAdded, SmallVectorImpl<unsigned> &LRegs) { // Look at all live registers. Skip Reg0 and the special CallResource. @@ -1278,7 +1278,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU) - CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, + CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs.get(), RegAdded, LRegs, TRI); } @@ -1302,7 +1302,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) - CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); + CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); } } else i += NumVals; @@ -1328,13 +1328,15 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { } } if (const uint32_t *RegMask = getNodeRegMask(Node)) - CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs); + CheckForLiveRegDefMasked(SU, RegMask, + makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()), + RegAdded, LRegs); const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; - for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) - CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); + for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) + CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); } return !LRegs.empty(); @@ -2718,7 +2720,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - const uint16_t *ImpDefs + const MCPhysReg *ImpDefs = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); const uint32_t *RegMask = getNodeRegMask(SU->getNode()); if(!ImpDefs && !RegMask) @@ -2737,7 +2739,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, return true; if (ImpDefs) - for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef) + for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef) // Return true if SU clobbers this physical register use and the // definition of the register reaches from DepSU. IsReachable queries // a topological forward sort of the DAG (following the successors). @@ -2756,13 +2758,13 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetRegisterInfo *TRI) { SDNode *N = SuccSU->getNode(); unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); - const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); + const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); assert(ImpDefs && "Caller should check hasPhysRegDefs"); for (const SDNode *SUNode = SU->getNode(); SUNode; SUNode = SUNode->getGluedNode()) { if (!SUNode->isMachineOpcode()) continue; - const uint16_t *SUImpDefs = + const MCPhysReg *SUImpDefs = TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); const uint32_t *SURegMask = getNodeRegMask(SUNode); if (!SUImpDefs && !SURegMask) diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 159c28cd2a61..5cc806668b12 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -86,12 +86,6 @@ namespace llvm { /// flagged together nodes with a single SUnit. void BuildSchedGraph(AliasAnalysis *AA); - /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is - /// CopyToReg and its only active data operands are CopyFromReg within a - /// single block loop. - /// - void InitVRegCycleFlag(SUnit *SU); - /// InitNumRegDefsLeft - Determine the # of regs defined by this node. /// void InitNumRegDefsLeft(SUnit *SU); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 14f44ccc60ce..abbc48e10e46 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "SDNodeDbgValue.h" +#include "llvm/ADT/APSInt.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -210,28 +211,6 @@ bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) { return true; } -/// isScalarToVector - Return true if the specified node is a -/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low -/// element is not an undef. -bool ISD::isScalarToVector(const SDNode *N) { - if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) - return true; - - if (N->getOpcode() != ISD::BUILD_VECTOR) - return false; - if (N->getOperand(0).getOpcode() == ISD::UNDEF) - return false; - unsigned NumElems = N->getNumOperands(); - if (NumElems == 1) - return false; - for (unsigned i = 1; i < NumElems; ++i) { - SDValue V = N->getOperand(i); - if (V.getOpcode() != ISD::UNDEF) - return false; - } - return true; -} - /// allOperandsUndef - Return true if the node has at least one operand /// and all operands of the specified node are ISD::UNDEF. bool ISD::allOperandsUndef(const SDNode *N) { @@ -397,24 +376,21 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID, ID.AddInteger(Op.getResNo()); } } + /// Add logical or fast math flag values to FoldingSetNodeID value. static void AddNodeIDFlags(FoldingSetNodeID &ID, unsigned Opcode, const SDNodeFlags *Flags) { - if (!Flags || !isBinOpWithFlags(Opcode)) + if (!isBinOpWithFlags(Opcode)) return; - unsigned RawFlags = Flags->getRawFlags(); - // If no flags are set, do not alter the ID. We must match the ID of nodes - // that were created without explicitly specifying flags. This also saves time - // and allows a gradual increase in API usage of the optional optimization - // flags. - if (RawFlags != 0) - ID.AddInteger(RawFlags); + unsigned RawFlags = 0; + if (Flags) + RawFlags = Flags->getRawFlags(); + ID.AddInteger(RawFlags); } static void AddNodeIDFlags(FoldingSetNodeID &ID, const SDNode *N) { - if (auto *Node = dyn_cast<BinaryWithFlagsSDNode>(N)) - AddNodeIDFlags(ID, Node->getOpcode(), &Node->Flags); + AddNodeIDFlags(ID, N->getOpcode(), N->getFlags()); } static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC, @@ -624,9 +600,9 @@ void SelectionDAG::RemoveDeadNodes() { SmallVector<SDNode*, 128> DeadNodes; // Add all obviously-dead nodes to the DeadNodes worklist. - for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I) - if (I->use_empty()) - DeadNodes.push_back(I); + for (SDNode &Node : allnodes()) + if (Node.use_empty()) + DeadNodes.push_back(&Node); RemoveDeadNodes(DeadNodes); @@ -766,6 +742,7 @@ static void VerifySDNode(SDNode *N) { void SelectionDAG::InsertNode(SDNode *N) { AllNodes.push_back(N); #ifndef NDEBUG + N->PersistentId = NextPersistentId++; VerifySDNode(N); #endif } @@ -929,7 +906,7 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), Root(getEntryNode()), NewNodesMustHaveLegalTypes(false), UpdateListeners(nullptr) { - AllNodes.push_back(&EntryNode); + InsertNode(&EntryNode); DbgInfo = new SDDbgInfo(); } @@ -950,7 +927,10 @@ void SelectionDAG::allnodes_clear() { assert(&*AllNodes.begin() == &EntryNode); AllNodes.remove(AllNodes.begin()); while (!AllNodes.empty()) - DeallocateNode(AllNodes.begin()); + DeallocateNode(&AllNodes.front()); +#ifndef NDEBUG + NextPersistentId = 0; +#endif } BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL, @@ -1023,7 +1003,7 @@ void SelectionDAG::clear() { static_cast<SDNode*>(nullptr)); EntryNode.UseList = nullptr; - AllNodes.push_back(&EntryNode); + InsertNode(&EntryNode); Root = getEntryNode(); DbgInfo->clear(); } @@ -1429,8 +1409,8 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, - TargetFlags); + SDNode *N = + new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1852,8 +1832,58 @@ SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { EVT ShTy = TLI->getShiftAmountTy(LHSTy, getDataLayout()); if (OpTy == ShTy || OpTy.isVector()) return Op; - ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; - return getNode(Opcode, SDLoc(Op), ShTy, Op); + return getZExtOrTrunc(Op, SDLoc(Op), ShTy); +} + +SDValue SelectionDAG::expandVAArg(SDNode *Node) { + SDLoc dl(Node); + const TargetLowering &TLI = getTargetLoweringInfo(); + const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); + EVT VT = Node->getValueType(0); + SDValue Tmp1 = Node->getOperand(0); + SDValue Tmp2 = Node->getOperand(1); + unsigned Align = Node->getConstantOperandVal(3); + + SDValue VAListLoad = + getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, Tmp2, + MachinePointerInfo(V), false, false, false, 0); + SDValue VAList = VAListLoad; + + if (Align > TLI.getMinStackArgumentAlignment()) { + assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); + + VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, + getConstant(Align - 1, dl, VAList.getValueType())); + + VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList, + getConstant(-(int64_t)Align, dl, VAList.getValueType())); + } + + // Increment the pointer, VAList, to the next vaarg + Tmp1 = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, + getConstant(getDataLayout().getTypeAllocSize( + VT.getTypeForEVT(*getContext())), + dl, VAList.getValueType())); + // Store the incremented VAList to the legalized pointer + Tmp1 = getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2, + MachinePointerInfo(V), false, false, 0); + // Load the actual argument out of the pointer VAList + return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo(), + false, false, false, 0); +} + +SDValue SelectionDAG::expandVACopy(SDNode *Node) { + SDLoc dl(Node); + const TargetLowering &TLI = getTargetLoweringInfo(); + // This defaults to loading a pointer from the input and storing it to the + // output, returning the chain. + const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue(); + const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue(); + SDValue Tmp1 = getLoad(TLI.getPointerTy(getDataLayout()), dl, + Node->getOperand(0), Node->getOperand(2), + MachinePointerInfo(VS), false, false, false, 0); + return getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), + MachinePointerInfo(VD), false, false, 0); } /// CreateStackTemporary - Create a stack temporary, suitable for holding the @@ -1872,8 +1902,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { /// CreateStackTemporary - Create a stack temporary suitable for holding /// either of the specified value types. SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { - unsigned Bytes = std::max(VT1.getStoreSizeInBits(), - VT2.getStoreSizeInBits())/8; + unsigned Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize()); Type *Ty1 = VT1.getTypeForEVT(*getContext()); Type *Ty2 = VT2.getTypeForEVT(*getContext()); const DataLayout &DL = getDataLayout(); @@ -2255,7 +2284,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, unsigned MemBits = VT.getScalarType().getSizeInBits(); KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); } else if (const MDNode *Ranges = LD->getRanges()) { - computeKnownBitsFromRangeMetadata(*Ranges, KnownZero); + if (LD->getExtensionType() == ISD::NON_EXTLOAD) + computeKnownBitsFromRangeMetadata(*Ranges, KnownZero, KnownOne); } break; } @@ -2564,6 +2594,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (Tmp == 1) return 1; // Early out. Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1); return std::min(Tmp, Tmp2); + case ISD::SELECT_CC: + Tmp = ComputeNumSignBits(Op.getOperand(2), Depth+1); + if (Tmp == 1) return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(3), Depth+1); + return std::min(Tmp, Tmp2); case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: @@ -2679,7 +2714,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ const int rIndex = Items - 1 - cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - // If the sign portion ends in our element the substraction gives correct + // If the sign portion ends in our element the subtraction gives correct // result. Otherwise it gives either negative or > bitwidth result return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0); } @@ -2798,6 +2833,16 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { return false; } +bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { + assert(A.getValueType() == B.getValueType() && + "Values must have the same type"); + APInt AZero, AOne; + APInt BZero, BOne; + computeKnownBits(A, AZero, AOne); + computeKnownBits(B, BZero, BOne); + return (AZero | BZero).isAllOnesValue(); +} + /// getNode - Gets or creates the specified node. /// SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { @@ -2848,8 +2893,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, return getConstantFP(APFloat(APFloat::IEEEhalf, Val), DL, VT); if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) return getConstantFP(APFloat(APFloat::IEEEsingle, Val), DL, VT); - else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) + if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) return getConstantFP(APFloat(APFloat::IEEEdouble, Val), DL, VT); + if (VT == MVT::f128 && C->getValueType(0) == MVT::i128) + return getConstantFP(APFloat(APFloat::IEEEquad, Val), DL, VT); break; case ISD::BSWAP: return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(), @@ -2954,44 +3001,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: case ISD::CTPOP: { - EVT SVT = VT.getScalarType(); - EVT InVT = BV->getValueType(0); - EVT InSVT = InVT.getScalarType(); - - // Find legal integer scalar type for constant promotion and - // ensure that its scalar size is at least as large as source. - EVT LegalSVT = SVT; - if (SVT.isInteger()) { - LegalSVT = TLI->getTypeToTransformTo(*getContext(), SVT); - if (LegalSVT.bitsLT(SVT)) break; - } - - // Let the above scalar folding handle the folding of each element. - SmallVector<SDValue, 8> Ops; - for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { - SDValue OpN = BV->getOperand(i); - EVT OpVT = OpN.getValueType(); - - // Build vector (integer) scalar operands may need implicit - // truncation - do this before constant folding. - if (OpVT.isInteger() && OpVT.bitsGT(InSVT)) - OpN = getNode(ISD::TRUNCATE, DL, InSVT, OpN); - - OpN = getNode(Opcode, DL, SVT, OpN); - - // Legalize the (integer) scalar constant if necessary. - if (LegalSVT != SVT) - OpN = getNode(ISD::ANY_EXTEND, DL, LegalSVT, OpN); - - if (OpN.getOpcode() != ISD::UNDEF && - OpN.getOpcode() != ISD::Constant && - OpN.getOpcode() != ISD::ConstantFP) - break; - Ops.push_back(OpN); - } - if (Ops.size() == VT.getVectorNumElements()) - return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); - break; + SDValue Ops = { Operand }; + if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) + return Fold; } } } @@ -3012,6 +3024,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, VT.getVectorNumElements() == Operand.getValueType().getVectorNumElements()) && "Vector element count mismatch!"); + assert(Operand.getValueType().bitsLT(VT) && + "Invalid fpext node, dst < src!"); if (Operand.getOpcode() == ISD::UNDEF) return getUNDEF(VT); break; @@ -3019,12 +3033,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid SIGN_EXTEND!"); if (Operand.getValueType() == VT) return Operand; // noop extension - assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && - "Invalid sext node, dst < src!"); assert((!VT.isVector() || VT.getVectorNumElements() == Operand.getValueType().getVectorNumElements()) && "Vector element count mismatch!"); + assert(Operand.getValueType().bitsLT(VT) && + "Invalid sext node, dst < src!"); if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); else if (OpOpcode == ISD::UNDEF) @@ -3035,12 +3049,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid ZERO_EXTEND!"); if (Operand.getValueType() == VT) return Operand; // noop extension - assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && - "Invalid zext node, dst < src!"); assert((!VT.isVector() || VT.getVectorNumElements() == Operand.getValueType().getVectorNumElements()) && "Vector element count mismatch!"); + assert(Operand.getValueType().bitsLT(VT) && + "Invalid zext node, dst < src!"); if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x) return getNode(ISD::ZERO_EXTEND, DL, VT, Operand.getNode()->getOperand(0)); @@ -3052,12 +3066,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid ANY_EXTEND!"); if (Operand.getValueType() == VT) return Operand; // noop extension - assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && - "Invalid anyext node, dst < src!"); assert((!VT.isVector() || VT.getVectorNumElements() == Operand.getValueType().getVectorNumElements()) && "Vector element count mismatch!"); + assert(Operand.getValueType().bitsLT(VT) && + "Invalid anyext node, dst < src!"); if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ANY_EXTEND) @@ -3077,12 +3091,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid TRUNCATE!"); if (Operand.getValueType() == VT) return Operand; // noop truncate - assert(Operand.getValueType().getScalarType().bitsGT(VT.getScalarType()) && - "Invalid truncate node, src < dst!"); assert((!VT.isVector() || VT.getVectorNumElements() == Operand.getValueType().getVectorNumElements()) && "Vector element count mismatch!"); + assert(Operand.getValueType().bitsGT(VT) && + "Invalid truncate node, src < dst!"); if (OpOpcode == ISD::TRUNCATE) return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || @@ -3135,8 +3149,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, case ISD::FNEG: // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) + // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags? return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1), - Operand.getNode()->getOperand(0)); + Operand.getNode()->getOperand(0), + &cast<BinaryWithFlagsSDNode>(Operand.getNode())->Flags); if (OpOpcode == ISD::FNEG) // --X -> X return Operand.getNode()->getOperand(0); break; @@ -3182,6 +3198,10 @@ static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1, case ISD::SRA: return std::make_pair(C1.ashr(C2), true); case ISD::ROTL: return std::make_pair(C1.rotl(C2), true); case ISD::ROTR: return std::make_pair(C1.rotr(C2), true); + case ISD::SMIN: return std::make_pair(C1.sle(C2) ? C1 : C2, true); + case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true); + case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true); + case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true); case ISD::UDIV: if (!C2.getBoolValue()) break; @@ -3284,10 +3304,118 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs); } +SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL, + EVT VT, + ArrayRef<SDValue> Ops, + const SDNodeFlags *Flags) { + // If the opcode is a target-specific ISD node, there's nothing we can + // do here and the operand rules may not line up with the below, so + // bail early. + if (Opcode >= ISD::BUILTIN_OP_END) + return SDValue(); + + // We can only fold vectors - maybe merge with FoldConstantArithmetic someday? + if (!VT.isVector()) + return SDValue(); + + unsigned NumElts = VT.getVectorNumElements(); + + auto IsScalarOrSameVectorSize = [&](const SDValue &Op) { + return !Op.getValueType().isVector() || + Op.getValueType().getVectorNumElements() == NumElts; + }; + + auto IsConstantBuildVectorOrUndef = [&](const SDValue &Op) { + BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op); + return (Op.getOpcode() == ISD::UNDEF) || + (Op.getOpcode() == ISD::CONDCODE) || (BV && BV->isConstant()); + }; + + // All operands must be vector types with the same number of elements as + // the result type and must be either UNDEF or a build vector of constant + // or UNDEF scalars. + if (!std::all_of(Ops.begin(), Ops.end(), IsConstantBuildVectorOrUndef) || + !std::all_of(Ops.begin(), Ops.end(), IsScalarOrSameVectorSize)) + return SDValue(); + + // If we are comparing vectors, then the result needs to be a i1 boolean + // that is then sign-extended back to the legal result type. + EVT SVT = (Opcode == ISD::SETCC ? MVT::i1 : VT.getScalarType()); + + // Find legal integer scalar type for constant promotion and + // ensure that its scalar size is at least as large as source. + EVT LegalSVT = VT.getScalarType(); + if (LegalSVT.isInteger()) { + LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT); + if (LegalSVT.bitsLT(SVT)) + return SDValue(); + } + + // Constant fold each scalar lane separately. + SmallVector<SDValue, 4> ScalarResults; + for (unsigned i = 0; i != NumElts; i++) { + SmallVector<SDValue, 4> ScalarOps; + for (SDValue Op : Ops) { + EVT InSVT = Op.getValueType().getScalarType(); + BuildVectorSDNode *InBV = dyn_cast<BuildVectorSDNode>(Op); + if (!InBV) { + // We've checked that this is UNDEF or a constant of some kind. + if (Op.isUndef()) + ScalarOps.push_back(getUNDEF(InSVT)); + else + ScalarOps.push_back(Op); + continue; + } + + SDValue ScalarOp = InBV->getOperand(i); + EVT ScalarVT = ScalarOp.getValueType(); + + // Build vector (integer) scalar operands may need implicit + // truncation - do this before constant folding. + if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT)) + ScalarOp = getNode(ISD::TRUNCATE, DL, InSVT, ScalarOp); + + ScalarOps.push_back(ScalarOp); + } + + // Constant fold the scalar operands. + SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags); + + // Legalize the (integer) scalar constant if necessary. + if (LegalSVT != SVT) + ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult); + + // Scalar folding only succeeded if the result is a constant or UNDEF. + if (ScalarResult.getOpcode() != ISD::UNDEF && + ScalarResult.getOpcode() != ISD::Constant && + ScalarResult.getOpcode() != ISD::ConstantFP) + return SDValue(); + ScalarResults.push_back(ScalarResult); + } + + assert(ScalarResults.size() == NumElts && + "Unexpected number of scalar results for BUILD_VECTOR"); + return getNode(ISD::BUILD_VECTOR, DL, VT, ScalarResults); +} + SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, const SDNodeFlags *Flags) { ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); + + // Canonicalize constant to RHS if commutative. + if (isCommutativeBinOp(Opcode)) { + if (N1C && !N2C) { + std::swap(N1C, N2C); + std::swap(N1, N2); + } else if (N1CFP && !N2CFP) { + std::swap(N1CFP, N2CFP); + std::swap(N1, N2); + } + } + switch (Opcode) { default: break; case ISD::TokenFactor: @@ -3356,6 +3484,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, case ISD::MUL: case ISD::SDIV: case ISD::SREM: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: assert(VT.isInteger() && "This operator does not apply to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); @@ -3367,37 +3499,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, case ISD::FREM: if (getTarget().Options.UnsafeFPMath) { if (Opcode == ISD::FADD) { - // 0+x --> x - if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) - if (CFP->getValueAPF().isZero()) - return N2; // x+0 --> x - if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2)) - if (CFP->getValueAPF().isZero()) - return N1; + if (N2CFP && N2CFP->getValueAPF().isZero()) + return N1; } else if (Opcode == ISD::FSUB) { // x-0 --> x - if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2)) - if (CFP->getValueAPF().isZero()) - return N1; + if (N2CFP && N2CFP->getValueAPF().isZero()) + return N1; } else if (Opcode == ISD::FMUL) { - ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1); - SDValue V = N2; - - // If the first operand isn't the constant, try the second - if (!CFP) { - CFP = dyn_cast<ConstantFPSDNode>(N2); - V = N1; - } - - if (CFP) { - // 0*x --> 0 - if (CFP->isZero()) - return SDValue(CFP,0); - // 1*x --> x - if (CFP->isExactlyValue(1.0)) - return V; - } + // x*0 --> 0 + if (N2CFP && N2CFP->isZero()) + return N2; + // x*1 --> x + if (N2CFP && N2CFP->isExactlyValue(1.0)) + return N1; } } assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); @@ -3457,7 +3572,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, assert(VT.isFloatingPoint() && N1.getValueType().isFloatingPoint() && VT.bitsLE(N1.getValueType()) && - isa<ConstantSDNode>(N2) && "Invalid FP_ROUND!"); + N2C && "Invalid FP_ROUND!"); if (N1.getValueType() == VT) return N1; // noop conversion. break; case ISD::AssertSext: @@ -3502,13 +3617,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SmallVector<SDValue, 8> Ops; for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { SDValue Op = N1.getOperand(i); - if (Op.getValueType() != VT.getScalarType()) break; if (Op.getOpcode() == ISD::UNDEF) { - Ops.push_back(Op); + Ops.push_back(getUNDEF(VT.getScalarType())); continue; } if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { APInt Val = C->getAPIntValue(); + Val = Val.zextOrTrunc(VT.getScalarSizeInBits()); Ops.push_back(SignExtendInReg(Val)); continue; } @@ -3590,15 +3705,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, return N1.getOperand(N2C->getZExtValue()); // EXTRACT_ELEMENT of a constant int is also very common. - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { + if (N1C) { unsigned ElementSize = VT.getSizeInBits(); unsigned Shift = ElementSize * N2C->getZExtValue(); - APInt ShiftedVal = C->getAPIntValue().lshr(Shift); + APInt ShiftedVal = N1C->getAPIntValue().lshr(Shift); return getConstant(ShiftedVal.trunc(ElementSize), DL, VT); } break; - case ISD::EXTRACT_SUBVECTOR: { - SDValue Index = N2; + case ISD::EXTRACT_SUBVECTOR: if (VT.isSimple() && N1.getValueType().isSimple()) { assert(VT.isVector() && N1.getValueType().isVector() && "Extract subvector VTs must be a vectors!"); @@ -3608,9 +3722,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, assert(VT.getSimpleVT() <= N1.getSimpleValueType() && "Extract subvector must be from larger vector to smaller vector!"); - if (isa<ConstantSDNode>(Index)) { - assert((VT.getVectorNumElements() + - cast<ConstantSDNode>(Index)->getZExtValue() + if (N2C) { + assert((VT.getVectorNumElements() + N2C->getZExtValue() <= N1.getValueType().getVectorNumElements()) && "Extract subvector overflow!"); } @@ -3621,29 +3734,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } break; } - } // Perform trivial constant folding. if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode())) return SV; - // Canonicalize constant to RHS if commutative. - if (N1C && !N2C && isCommutativeBinOp(Opcode)) { - std::swap(N1C, N2C); - std::swap(N1, N2); - } - // Constant fold FP operations. bool HasFPExceptions = TLI->hasFloatingPointExceptions(); - ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); - ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); if (N1CFP) { - if (!N2CFP && isCommutativeBinOp(Opcode)) { - // Canonicalize constant to RHS if commutative. - std::swap(N1CFP, N2CFP); - std::swap(N1, N2); - } else if (N2CFP) { + if (N2CFP) { APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF(); APFloat::opStatus s; switch (Opcode) { @@ -3670,7 +3770,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } break; case ISD::FREM : - s = V1.mod(V2, APFloat::rmNearestTiesToEven); + s = V1.mod(V2); if (!HasFPExceptions || (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)) { return getConstantFP(V1, DL, VT); @@ -3795,7 +3895,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3) { // Perform various simplifications. - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); switch (Opcode) { case ISD::FMA: { ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); @@ -3827,12 +3926,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, break; case ISD::SETCC: { // Use FoldSetCC to simplify SETCC's. - SDValue Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL); - if (Simp.getNode()) return Simp; + if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL)) + return V; + // Vector constant folding. + SDValue Ops[] = {N1, N2, N3}; + if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) + return V; break; } case ISD::SELECT: - if (N1C) { + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) { if (N1C->getZExtValue()) return N2; // select true, X, Y -> X return N3; // select false, X, Y -> Y @@ -4153,6 +4256,14 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, return true; } +static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { + // On Darwin, -Os means optimize for size without hurting performance, so + // only really optimize for size when -Oz (MinSize) is used. + if (MF.getTarget().getTargetTriple().isOSDarwin()) + return MF.getFunction()->optForMinSize(); + return MF.getFunction()->optForSize(); +} + static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, @@ -4173,7 +4284,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); + bool OptSize = shouldLowerMemFuncForSize(MF); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4286,7 +4397,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); + bool OptSize = shouldLowerMemFuncForSize(MF); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4380,7 +4491,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); + bool OptSize = shouldLowerMemFuncForSize(MF); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4446,6 +4557,16 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } +static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI, + unsigned AS) { + // Lowering memcpy / memset / memmove intrinsics to calls is only valid if all + // pointer operands can be losslessly bitcasted to pointers of address space 0 + if (AS != 0 && !TLI->isNoopAddrSpaceCast(AS, 0)) { + report_fatal_error("cannot lower memory intrinsic in address space " + + Twine(AS)); + } +} + SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, @@ -4487,6 +4608,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, true, DstPtrInfo, SrcPtrInfo); } + checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); + checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace()); + // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc // memcpy is not guaranteed to be safe. libc memcpys aren't required to // respect volatile, so they may do things like read or write memory @@ -4548,6 +4672,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, return Result; } + checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); + checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace()); + // FIXME: If the memmove is volatile, lowering it to plain libc memmove may // not be safe. See memcpy above for more details. @@ -4605,6 +4732,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, return Result; } + checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); + // Emit a library call. Type *IntPtrTy = getDataLayout().getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; @@ -4872,10 +5001,12 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, /// MachinePointerInfo record from it. This is particularly useful because the /// code generator has many cases where it doesn't bother passing in a /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". -static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) { +static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr, + int64_t Offset = 0) { // If this is FI+Offset, we can model it. if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) - return MachinePointerInfo::getFixedStack(FI->getIndex(), Offset); + return MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), + FI->getIndex(), Offset); // If this is (FI+Offset1)+Offset2, we can model it. if (Ptr.getOpcode() != ISD::ADD || @@ -4884,20 +5015,22 @@ static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) { return MachinePointerInfo(); int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); - return MachinePointerInfo::getFixedStack(FI, Offset+ - cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue()); + return MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FI, + Offset + cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue()); } /// InferPointerInfo - If the specified ptr/offset is a frame index, infer a /// MachinePointerInfo record from it. This is particularly useful because the /// code generator has many cases where it doesn't bother passing in a /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". -static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) { +static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr, + SDValue OffsetOp) { // If the 'Offset' value isn't a constant, we can't handle this. if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp)) - return InferPointerInfo(Ptr, OffsetNode->getSExtValue()); + return InferPointerInfo(DAG, Ptr, OffsetNode->getSExtValue()); if (OffsetOp.getOpcode() == ISD::UNDEF) - return InferPointerInfo(Ptr); + return InferPointerInfo(DAG, Ptr); return MachinePointerInfo(); } @@ -4926,7 +5059,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, // If we don't have a PtrInfo, infer the trivial frame index case to simplify // clients. if (PtrInfo.V.isNull()) - PtrInfo = InferPointerInfo(Ptr, Offset); + PtrInfo = InferPointerInfo(*this, Ptr, Offset); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = @@ -5054,7 +5187,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, Flags |= MachineMemOperand::MONonTemporal; if (PtrInfo.V.isNull()) - PtrInfo = InferPointerInfo(Ptr); + PtrInfo = InferPointerInfo(*this, Ptr); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = @@ -5109,7 +5242,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, Flags |= MachineMemOperand::MONonTemporal; if (PtrInfo.V.isNull()) - PtrInfo = InferPointerInfo(Ptr); + PtrInfo = InferPointerInfo(*this, Ptr); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = @@ -5261,7 +5394,7 @@ SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, SDLoc dl, cast<MaskedGatherSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - MaskedGatherSDNode *N = + MaskedGatherSDNode *N = new (NodeAllocator) MaskedGatherSDNode(dl.getIROrder(), dl.getDebugLoc(), Ops, VTs, VT, MMO); CSEMap.InsertNode(N, IP); @@ -5317,12 +5450,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, - ArrayRef<SDValue> Ops) { + ArrayRef<SDValue> Ops, const SDNodeFlags *Flags) { unsigned NumOps = Ops.size(); switch (NumOps) { case 0: return getNode(Opcode, DL, VT); case 1: return getNode(Opcode, DL, VT, Ops[0]); - case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]); + case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags); case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); default: break; } @@ -5656,7 +5789,7 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) { "Update with wrong number of operands"); // If no operands changed just return the input node. - if (Ops.empty() || std::equal(Ops.begin(), Ops.end(), N->op_begin())) + if (std::equal(Ops.begin(), Ops.end(), N->op_begin())) return N; // See if the modified node already exists. @@ -6451,13 +6584,13 @@ unsigned SelectionDAG::AssignTopologicalOrder() { // Node Id fields for nodes At SortedPos and after will contain the // count of outstanding operands. for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) { - SDNode *N = I++; + SDNode *N = &*I++; checkForCycles(N, this); unsigned Degree = N->getNumOperands(); if (Degree == 0) { // A node with no uses, add it to the result array immediately. N->setNodeId(DAGSize++); - allnodes_iterator Q = N; + allnodes_iterator Q(N); if (Q != SortedPos) SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q)); assert(SortedPos != AllNodes.end() && "Overran node list"); @@ -6470,8 +6603,8 @@ unsigned SelectionDAG::AssignTopologicalOrder() { // Visit all the nodes. As we iterate, move nodes into sorted order, // such that by the time the end is reached all nodes will be sorted. - for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) { - SDNode *N = I; + for (SDNode &Node : allnodes()) { + SDNode *N = &Node; checkForCycles(N, this); // N is in sorted position, so all its uses have one less operand // that needs to be sorted. @@ -6493,9 +6626,10 @@ unsigned SelectionDAG::AssignTopologicalOrder() { P->setNodeId(Degree); } } - if (I == SortedPos) { + if (&Node == SortedPos) { #ifndef NDEBUG - SDNode *S = ++I; + allnodes_iterator I(N); + SDNode *S = &*++I; dbgs() << "Overran sorted position:\n"; S->dumprFull(this); dbgs() << "\n"; dbgs() << "Checking if this is due to cycles\n"; @@ -6559,6 +6693,26 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { // SDNode Class //===----------------------------------------------------------------------===// +bool llvm::isNullConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); + return Const != nullptr && Const->isNullValue(); +} + +bool llvm::isNullFPConstant(SDValue V) { + ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V); + return Const != nullptr && Const->isZero() && !Const->isNegative(); +} + +bool llvm::isAllOnesConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); + return Const != nullptr && Const->isAllOnesValue(); +} + +bool llvm::isOneConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); + return Const != nullptr && Const->isOne(); +} + HandleSDNode::~HandleSDNode() { DropOperands(); } @@ -6772,6 +6926,12 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const { return cast<ConstantSDNode>(OperandList[Num])->getZExtValue(); } +const SDNodeFlags *SDNode::getFlags() const { + if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this)) + return &FlagsNode->Flags; + return nullptr; +} + SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { assert(N->getNumValues() == 1 && "Can't unroll a vector with multiple results!"); @@ -6808,9 +6968,11 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { } switch (N->getOpcode()) { - default: - Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands)); + default: { + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands, + N->getFlags())); break; + } case ISD::VSELECT: Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands)); break; @@ -7101,6 +7263,24 @@ BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const { return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements)); } +int32_t +BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, + uint32_t BitWidth) const { + if (ConstantFPSDNode *CN = + dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements))) { + bool IsExact; + APSInt IntVal(BitWidth); + APFloat APF = CN->getValueAPF(); + if (APF.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) != + APFloat::opOK || + !IsExact) + return -1; + + return IntVal.exactLogBase2(); + } + return -1; +} + bool BuildVectorSDNode::isConstant() const { for (const SDValue &Op : op_values()) { unsigned Opc = Op.getOpcode(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 2c3c0eb101a0..d2ea85ab4d22 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -63,6 +64,7 @@ #include "llvm/Target/TargetSelectionDAGInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> +#include <utility> using namespace llvm; #define DEBUG_TYPE "isel" @@ -79,7 +81,7 @@ LimitFPPrecision("limit-float-precision", cl::init(0)); static cl::opt<bool> -EnableFMFInDAG("enable-fmf-dag", cl::init(false), cl::Hidden, +EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden, cl::desc("Enable fast-math-flags for DAG nodes")); // Limit the width of DAG chains. This is important in general to prevent @@ -196,6 +198,14 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, if (PartEVT == ValueVT) return Val; + if (PartEVT.isInteger() && ValueVT.isFloatingPoint() && + ValueVT.bitsLT(PartEVT)) { + // For an FP value in an integer part, we need to truncate to the right + // width first. + PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val); + } + if (PartEVT.isInteger() && ValueVT.isInteger()) { if (ValueVT.bitsLT(PartEVT)) { // For a truncate, see if we have any information to @@ -319,9 +329,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() && "Cannot handle this kind of promotion"); // Promoted vector extract - bool Smaller = ValueVT.bitsLE(PartEVT); - return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), - DL, ValueVT, Val); + return DAG.getAnyExtOrTrunc(Val, DL, ValueVT); } @@ -339,11 +347,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, } if (ValueVT.getVectorNumElements() == 1 && - ValueVT.getVectorElementType() != PartEVT) { - bool Smaller = ValueVT.bitsLE(PartEVT); - Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), - DL, ValueVT.getScalarType(), Val); - } + ValueVT.getVectorElementType() != PartEVT) + Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType()); return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); } @@ -387,6 +392,12 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, assert(NumParts == 1 && "Do not know what to promote to!"); Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); } else { + if (ValueVT.isFloatingPoint()) { + // FP values need to be bitcast, then extended if they are being put + // into a larger container. + ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + } assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && ValueVT.isInteger() && "Unknown mismatch!"); @@ -520,9 +531,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { // Promoted vector extract - bool Smaller = PartEVT.bitsLE(ValueVT); - Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), - DL, PartVT, Val); + Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } else{ // Vector -> scalar conversion. assert(ValueVT.getVectorNumElements() == 1 && @@ -531,9 +540,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); - bool Smaller = ValueVT.bitsLE(PartVT); - Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), - DL, PartVT, Val); + Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } Parts[0] = Val; @@ -595,8 +602,7 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, const DataLayout &DL, unsigned Reg, Type *Ty) { ComputeValueVTs(TLI, DL, Ty, ValueVTs); - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; + for (EVT ValueVT : ValueVTs) { unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT); MVT RegisterVT = TLI.getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) @@ -907,7 +913,8 @@ void SelectionDAGBuilder::visit(const Instruction &I) { visit(I.getOpcode(), I); - if (!isa<TerminatorInst>(&I) && !HasTailCall) + if (!isa<TerminatorInst>(&I) && !HasTailCall && + !isStatepoint(&I)) // statepoints handle their exports internally CopyToExportRegsIfNeeded(&I); CurInst = nullptr; @@ -943,14 +950,12 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, assert(Variable->isValidLocationForIntrinsic(dl) && "Expected inlined-at fields to agree"); uint64_t Offset = DI->getOffset(); - // A dbg.value for an alloca is always indirect. - bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; SDDbgValue *SDV; if (Val.getNode()) { - if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, IsIndirect, + if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, false, Val)) { SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(), - IsIndirect, Offset, dl, DbgSDNodeOrder); + false, Offset, dl, DbgSDNodeOrder); DAG.AddDbgValue(SDV, Val.getNode(), false); } } else @@ -1168,6 +1173,135 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { llvm_unreachable("Can't get register for value!"); } +void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) { + auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); + bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX; + bool IsCoreCLR = Pers == EHPersonality::CoreCLR; + MachineBasicBlock *CatchPadMBB = FuncInfo.MBB; + // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues. + if (IsMSVCCXX || IsCoreCLR) + CatchPadMBB->setIsEHFuncletEntry(); + + DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, getControlRoot())); +} + +void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { + // Update machine-CFG edge. + MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()]; + FuncInfo.MBB->addSuccessor(TargetMBB); + + auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); + bool IsSEH = isAsynchronousEHPersonality(Pers); + if (IsSEH) { + // If this is not a fall-through branch or optimizations are switched off, + // emit the branch. + if (TargetMBB != NextBlock(FuncInfo.MBB) || + TM.getOptLevel() == CodeGenOpt::None) + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(TargetMBB))); + return; + } + + // Figure out the funclet membership for the catchret's successor. + // This will be used by the FuncletLayout pass to determine how to order the + // BB's. + WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); + const BasicBlock *SuccessorColor = EHInfo->CatchRetSuccessorColorMap[&I]; + assert(SuccessorColor && "No parent funclet for catchret!"); + MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor]; + assert(SuccessorColorMBB && "No MBB for SuccessorColor!"); + + // Create the terminator node. + SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(TargetMBB), + DAG.getBasicBlock(SuccessorColorMBB)); + DAG.setRoot(Ret); +} + +void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) { + // Don't emit any special code for the cleanuppad instruction. It just marks + // the start of a funclet. + FuncInfo.MBB->setIsEHFuncletEntry(); + FuncInfo.MBB->setIsCleanupFuncletEntry(); +} + +/// When an invoke or a cleanupret unwinds to the next EH pad, there are +/// many places it could ultimately go. In the IR, we have a single unwind +/// destination, but in the machine CFG, we enumerate all the possible blocks. +/// This function skips over imaginary basic blocks that hold catchswitch +/// instructions, and finds all the "real" machine +/// basic block destinations. As those destinations may not be successors of +/// EHPadBB, here we also calculate the edge probability to those destinations. +/// The passed-in Prob is the edge probability to EHPadBB. +static void findUnwindDestinations( + FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB, + BranchProbability Prob, + SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>> + &UnwindDests) { + EHPersonality Personality = + classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); + bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX; + bool IsCoreCLR = Personality == EHPersonality::CoreCLR; + + while (EHPadBB) { + const Instruction *Pad = EHPadBB->getFirstNonPHI(); + BasicBlock *NewEHPadBB = nullptr; + if (isa<LandingPadInst>(Pad)) { + // Stop on landingpads. They are not funclets. + UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); + break; + } else if (isa<CleanupPadInst>(Pad)) { + // Stop on cleanup pads. Cleanups are always funclet entries for all known + // personalities. + UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); + UnwindDests.back().first->setIsEHFuncletEntry(); + break; + } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { + // Add the catchpad handlers to the possible destinations. + for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { + UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob); + // For MSVC++ and the CLR, catchblocks are funclets and need prologues. + if (IsMSVCCXX || IsCoreCLR) + UnwindDests.back().first->setIsEHFuncletEntry(); + } + NewEHPadBB = CatchSwitch->getUnwindDest(); + } else { + continue; + } + + BranchProbabilityInfo *BPI = FuncInfo.BPI; + if (BPI && NewEHPadBB) + Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB); + EHPadBB = NewEHPadBB; + } +} + +void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) { + // Update successor info. + SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests; + auto UnwindDest = I.getUnwindDest(); + BranchProbabilityInfo *BPI = FuncInfo.BPI; + BranchProbability UnwindDestProb = + (BPI && UnwindDest) + ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest) + : BranchProbability::getZero(); + findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests); + for (auto &UnwindDest : UnwindDests) { + UnwindDest.first->setIsEHPad(); + addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second); + } + FuncInfo.MBB->normalizeSuccProbs(); + + // Create the terminator node. + SDValue Ret = + DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot()); + DAG.setRoot(Ret); +} + +void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) { + report_fatal_error("visitCatchSwitch not yet implemented!"); +} + void SelectionDAGBuilder::visitRet(const ReturnInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); auto &DL = DAG.getDataLayout(); @@ -1186,7 +1320,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()), PtrValueVTs); - SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); + SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + DemoteReg, PtrValueVTs[0]); SDValue RetOp = getValue(I.getOperand(0)); SmallVector<EVT, 4> ValueVTs; @@ -1334,25 +1469,34 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, } /// Return branch probability calculated by BranchProbabilityInfo for IR blocks. -uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src, - const MachineBasicBlock *Dst) const { +BranchProbability +SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { BranchProbabilityInfo *BPI = FuncInfo.BPI; - if (!BPI) - return 0; const BasicBlock *SrcBB = Src->getBasicBlock(); const BasicBlock *DstBB = Dst->getBasicBlock(); - return BPI->getEdgeWeight(SrcBB, DstBB); + if (!BPI) { + // If BPI is not available, set the default probability as 1 / N, where N is + // the number of successors. + auto SuccSize = std::max<uint32_t>( + std::distance(succ_begin(SrcBB), succ_end(SrcBB)), 1); + return BranchProbability(1, SuccSize); + } + return BPI->getEdgeProbability(SrcBB, DstBB); } -void SelectionDAGBuilder:: -addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst, - uint32_t Weight /* = 0 */) { - if (!Weight) - Weight = getEdgeWeight(Src, Dst); - Src->addSuccessor(Dst, Weight); +void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src, + MachineBasicBlock *Dst, + BranchProbability Prob) { + if (!FuncInfo.BPI) + Src->addSuccessorWithoutProb(Dst); + else { + if (Prob.isUnknown()) + Prob = getEdgeProbability(Src, Dst); + Src->addSuccessor(Dst, Prob); + } } - static bool InBlock(const Value *V, const BasicBlock *BB) { if (const Instruction *I = dyn_cast<Instruction>(V)) return I->getParent() == BB; @@ -1369,8 +1513,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, - uint32_t TWeight, - uint32_t FWeight) { + BranchProbability TProb, + BranchProbability FProb) { const BasicBlock *BB = CurBB->getBasicBlock(); // If the leaf of the tree is a comparison, merge the condition into @@ -1385,17 +1529,15 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, ISD::CondCode Condition; if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { Condition = getICmpCondCode(IC->getPredicate()); - } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) { + } else { + const FCmpInst *FC = cast<FCmpInst>(Cond); Condition = getFCmpCondCode(FC->getPredicate()); if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); - } else { - (void)Condition; // silence warning. - llvm_unreachable("Unknown compare instruction"); } CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, - TBB, FBB, CurBB, TWeight, FWeight); + TBB, FBB, CurBB, TProb, FProb); SwitchCases.push_back(CB); return; } @@ -1403,26 +1545,19 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), - nullptr, TBB, FBB, CurBB, TWeight, FWeight); + nullptr, TBB, FBB, CurBB, TProb, FProb); SwitchCases.push_back(CB); } -/// Scale down both weights to fit into uint32_t. -static void ScaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { - uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; - uint32_t Scale = (NewMax / UINT32_MAX) + 1; - NewTrue = NewTrue / Scale; - NewFalse = NewFalse / Scale; -} - /// FindMergedConditions - If Cond is an expression like void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, - unsigned Opc, uint32_t TWeight, - uint32_t FWeight) { + Instruction::BinaryOps Opc, + BranchProbability TProb, + BranchProbability FProb) { // If this node is not part of the or/and tree, emit it as a branch. const Instruction *BOp = dyn_cast<Instruction>(Cond); if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || @@ -1431,12 +1566,12 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, - TWeight, FWeight); + TProb, FProb); return; } // Create TmpBB after CurBB. - MachineFunction::iterator BBI = CurBB; + MachineFunction::iterator BBI(CurBB); MachineFunction &MF = DAG.getMachineFunction(); MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock()); CurBB->getParent()->insert(++BBI, TmpBB); @@ -1455,26 +1590,25 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // The requirement is that // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) // = TrueProb for original BB. - // Assuming the original weights are A and B, one choice is to set BB1's - // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice - // assumes that + // Assuming the original probabilities are A and B, one choice is to set + // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to + // A/(1+B) and 2B/(1+B). This choice assumes that // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. // Another choice is to assume TrueProb for BB1 equals to TrueProb for // TmpBB, but the math is more complicated. - uint64_t NewTrueWeight = TWeight; - uint64_t NewFalseWeight = (uint64_t)TWeight + 2 * (uint64_t)FWeight; - ScaleWeights(NewTrueWeight, NewFalseWeight); + auto NewTrueProb = TProb / 2; + auto NewFalseProb = TProb / 2 + FProb; // Emit the LHS condition. FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc, - NewTrueWeight, NewFalseWeight); + NewTrueProb, NewFalseProb); - NewTrueWeight = TWeight; - NewFalseWeight = 2 * (uint64_t)FWeight; - ScaleWeights(NewTrueWeight, NewFalseWeight); + // Normalize A/2 and B to get A/(1+B) and 2B/(1+B). + SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb}; + BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, - NewTrueWeight, NewFalseWeight); + Probs[0], Probs[1]); } else { assert(Opc == Instruction::And && "Unknown merge op!"); // Codegen X & Y as: @@ -1491,24 +1625,23 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // The requirement is that // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) // = FalseProb for original BB. - // Assuming the original weights are A and B, one choice is to set BB1's - // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice - // assumes that - // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. - - uint64_t NewTrueWeight = 2 * (uint64_t)TWeight + (uint64_t)FWeight; - uint64_t NewFalseWeight = FWeight; - ScaleWeights(NewTrueWeight, NewFalseWeight); + // Assuming the original probabilities are A and B, one choice is to set + // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to + // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 == + // TrueProb for BB1 * FalseProb for TmpBB. + + auto NewTrueProb = TProb + FProb / 2; + auto NewFalseProb = FProb / 2; // Emit the LHS condition. FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc, - NewTrueWeight, NewFalseWeight); + NewTrueProb, NewFalseProb); - NewTrueWeight = 2 * (uint64_t)TWeight; - NewFalseWeight = FWeight; - ScaleWeights(NewTrueWeight, NewFalseWeight); + // Normalize A and B/2 to get 2A/(1+A) and B/(1+A). + SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2}; + BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, - NewTrueWeight, NewFalseWeight); + Probs[0], Probs[1]); } } @@ -1585,12 +1718,14 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // jle foo // if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { - if (!DAG.getTargetLoweringInfo().isJumpExpensive() && - BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || - BOp->getOpcode() == Instruction::Or)) { + Instruction::BinaryOps Opcode = BOp->getOpcode(); + if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() && + !I.getMetadata(LLVMContext::MD_unpredictable) && + (Opcode == Instruction::And || Opcode == Instruction::Or)) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, - BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB), - getEdgeWeight(BrMBB, Succ1MBB)); + Opcode, + getEdgeProbability(BrMBB, Succ0MBB), + getEdgeProbability(BrMBB, Succ1MBB)); // If the compares in later blocks need to use values not currently // exported from this block, export them now. This block should always // be the first entry. @@ -1669,11 +1804,12 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, } // Update successor info - addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight); + addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb); // TrueBB and FalseBB are always different unless the incoming IR is // degenerate. This only happens when running llc on weird IR. if (CB.TrueBB != CB.FalseBB) - addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); + addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb); + SwitchBB->normalizeSuccProbs(); // If the lhs block is the next block, invert the condition so that we can // fall through to the lhs instead of the rhs block. @@ -1797,10 +1933,10 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, GuardPtr, MachinePointerInfo(IRGuard, 0), true, false, false, Align); - SDValue StackSlot = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(), - StackSlotPtr, - MachinePointerInfo::getFixedStack(FI), - true, false, false, Align); + SDValue StackSlot = DAG.getLoad( + PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), true, + false, false, Align); // Perform the comparison via a subtract/getsetcc. EVT VT = Guard.getValueType(); @@ -1837,7 +1973,7 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Chain = TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, - nullptr, 0, false, getCurSDLoc(), false, false).second; + None, false, getCurSDLoc(), false, false).second; DAG.setRoot(Chain); } @@ -1884,8 +2020,9 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock* MBB = B.Cases[0].ThisBB; - addSuccessorWithWeight(SwitchBB, B.Default); - addSuccessorWithWeight(SwitchBB, MBB); + addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); + addSuccessorWithProb(SwitchBB, MBB, B.Prob); + SwitchBB->normalizeSuccProbs(); SDValue BrRange = DAG.getNode(ISD::BRCOND, dl, MVT::Other, CopyTo, RangeCmp, @@ -1902,7 +2039,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, /// visitBitTestCase - this function produces one "bit test" void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, - uint32_t BranchWeightToNext, + BranchProbability BranchProbToNext, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { @@ -1938,10 +2075,14 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE); } - // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. - addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight); - // The branch weight from SwitchBB to NextMBB is BranchWeightToNext. - addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext); + // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb. + addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb); + // The branch probability from SwitchBB to NextMBB is BranchProbToNext. + addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext); + // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is + // one as they are relative probabilities (and thus work more like weights), + // and hence we need to normalize them to let the sum of them become one. + SwitchBB->normalizeSuccProbs(); SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(), @@ -1958,9 +2099,10 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { MachineBasicBlock *InvokeMBB = FuncInfo.MBB; - // Retrieve successors. + // Retrieve successors. Look through artificial IR level blocks like + // catchswitch for successors. MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; - MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; + const BasicBlock *EHPadBB = I.getSuccessor(1); const Value *Callee(I.getCalledValue()); const Function *Fn = dyn_cast<Function>(Callee); @@ -1975,14 +2117,14 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { break; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: - visitPatchpoint(&I, LandingPad); + visitPatchpoint(&I, EHPadBB); break; case Intrinsic::experimental_gc_statepoint: - LowerStatepoint(ImmutableStatepoint(&I), LandingPad); + LowerStatepoint(ImmutableStatepoint(&I), EHPadBB); break; } } else - LowerCallTo(&I, getValue(Callee), false, LandingPad); + LowerCallTo(&I, getValue(Callee), false, EHPadBB); // If the value of the invoke is used outside of its defining block, make it // available as a virtual register. @@ -1992,9 +2134,20 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { CopyToExportRegsIfNeeded(&I); } - // Update successor info - addSuccessorWithWeight(InvokeMBB, Return); - addSuccessorWithWeight(InvokeMBB, LandingPad); + SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests; + BranchProbabilityInfo *BPI = FuncInfo.BPI; + BranchProbability EHPadBBProb = + BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB) + : BranchProbability::getZero(); + findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests); + + // Update successor info. + addSuccessorWithProb(InvokeMBB, Return); + for (auto &UnwindDest : UnwindDests) { + UnwindDest.first->setIsEHPad(); + addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second); + } + InvokeMBB->normalizeSuccProbs(); // Drop into normal successor. DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), @@ -2007,7 +2160,7 @@ void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { } void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { - assert(FuncInfo.MBB->isLandingPad() && + assert(FuncInfo.MBB->isEHPad() && "Call to landingpad not in landing pad!"); MachineBasicBlock *MBB = FuncInfo.MBB; @@ -2017,8 +2170,16 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { // If there aren't registers to copy the values into (e.g., during SjLj // exceptions), then don't bother to create these DAG nodes. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLI.getExceptionPointerRegister() == 0 && - TLI.getExceptionSelectorRegister() == 0) + const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn(); + if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 && + TLI.getExceptionSelectorRegister(PersonalityFn) == 0) + return; + + // If landingpad's return type is token type, we don't create DAG nodes + // for its exception pointer and selector value. The extraction of exception + // pointer or selector value from token type landingpads is not currently + // supported. + if (LP.getType()->isTokenTy()) return; SmallVector<EVT, 2> ValueVTs; @@ -2074,8 +2235,7 @@ void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) { // If this case has the same successor and is a neighbour, merge it into // the previous cluster. Clusters[DstIndex - 1].High = CaseVal; - Clusters[DstIndex - 1].Weight += CC.Weight; - assert(Clusters[DstIndex - 1].Weight >= CC.Weight && "Weight overflow!"); + Clusters[DstIndex - 1].Prob += CC.Prob; } else { std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex], sizeof(Clusters[SrcIndex])); @@ -2109,8 +2269,9 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { continue; MachineBasicBlock *Succ = FuncInfo.MBBMap[BB]; - addSuccessorWithWeight(IndirectBrMBB, Succ); + addSuccessorWithProb(IndirectBrMBB, Succ); } + IndirectBrMBB->normalizeSuccProbs(); DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(), MVT::Other, getControlRoot(), @@ -2119,7 +2280,8 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { if (DAG.getTarget().Options.TrapUnreachable) - DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); + DAG.setRoot( + DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); } void SelectionDAGBuilder::visitFSub(const User &I) { @@ -2260,6 +2422,10 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Condition = getFCmpCondCode(predicate); + + // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them. + // FIXME: We should propagate the fast-math-flags to the DAG node itself for + // further optimization, but currently FMF is only applicable to binary nodes. if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), @@ -2284,27 +2450,74 @@ void SelectionDAGBuilder::visitSelect(const User &I) { // Min/max matching is only viable if all output VTs are the same. if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) { - Value *LHS, *RHS; - SelectPatternFlavor SPF = matchSelectPattern(const_cast<User*>(&I), LHS, RHS); - ISD::NodeType Opc = ISD::DELETED_NODE; - switch (SPF) { - case SPF_UMAX: Opc = ISD::UMAX; break; - case SPF_UMIN: Opc = ISD::UMIN; break; - case SPF_SMAX: Opc = ISD::SMAX; break; - case SPF_SMIN: Opc = ISD::SMIN; break; - default: break; - } - EVT VT = ValueVTs[0]; LLVMContext &Ctx = *DAG.getContext(); auto &TLI = DAG.getTargetLoweringInfo(); - while (TLI.getTypeAction(Ctx, VT) == TargetLoweringBase::TypeSplitVector) + + // We care about the legality of the operation after it has been type + // legalized. + while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal && + VT != TLI.getTypeToTransformTo(Ctx, VT)) VT = TLI.getTypeToTransformTo(Ctx, VT); - if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT) && - // If the underlying comparison instruction is used by any other instruction, - // the consumed instructions won't be destroyed, so it is not profitable - // to convert to a min/max. + // If the vselect is legal, assume we want to leave this as a vector setcc + + // vselect. Otherwise, if this is going to be scalarized, we want to see if + // min/max is legal on the scalar type. + bool UseScalarMinMax = VT.isVector() && + !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT); + + Value *LHS, *RHS; + auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS); + ISD::NodeType Opc = ISD::DELETED_NODE; + switch (SPR.Flavor) { + case SPF_UMAX: Opc = ISD::UMAX; break; + case SPF_UMIN: Opc = ISD::UMIN; break; + case SPF_SMAX: Opc = ISD::SMAX; break; + case SPF_SMIN: Opc = ISD::SMIN; break; + case SPF_FMINNUM: + switch (SPR.NaNBehavior) { + case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); + case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break; + case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break; + case SPNB_RETURNS_ANY: { + if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT)) + Opc = ISD::FMINNUM; + else if (TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT)) + Opc = ISD::FMINNAN; + else if (UseScalarMinMax) + Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ? + ISD::FMINNUM : ISD::FMINNAN; + break; + } + } + break; + case SPF_FMAXNUM: + switch (SPR.NaNBehavior) { + case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); + case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break; + case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break; + case SPNB_RETURNS_ANY: + + if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT)) + Opc = ISD::FMAXNUM; + else if (TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT)) + Opc = ISD::FMAXNAN; + else if (UseScalarMinMax) + Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ? + ISD::FMAXNUM : ISD::FMAXNAN; + break; + } + break; + default: break; + } + + if (Opc != ISD::DELETED_NODE && + (TLI.isOperationLegalOrCustom(Opc, VT) || + (UseScalarMinMax && + TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) && + // If the underlying comparison instruction is used by any other + // instruction, the consumed instructions won't be destroyed, so it is + // not profitable to convert to a min/max. cast<SelectInst>(&I)->getCondition()->hasOneUse()) { OpCode = Opc; LHSVal = getValue(LHS); @@ -2920,7 +3133,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { // throughout the function's lifetime. bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr && - isDereferenceablePointer(SV, *DAG.getTarget().getDataLayout()); + isDereferenceablePointer(SV, DAG.getDataLayout()); unsigned Alignment = I.getAlignment(); AAMDNodes AAInfo; @@ -2940,8 +3153,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (isVolatile || NumValues > MaxParallelChains) // Serialize volatile loads with other side effects. Root = getRoot(); - else if (AA->pointsToConstantMemory( - MemoryLocation(SV, AA->getTypeStoreSize(Ty), AAInfo))) { + else if (AA->pointsToConstantMemory(MemoryLocation( + SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; @@ -3056,7 +3269,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { SDLoc sdl = getCurSDLoc(); - // llvm.masked.store.*(Src0, Ptr, alignemt, Mask) + // llvm.masked.store.*(Src0, Ptr, alignment, Mask) Value *PtrOperand = I.getArgOperand(1); SDValue Ptr = getValue(PtrOperand); SDValue Src0 = getValue(I.getArgOperand(0)); @@ -3080,63 +3293,70 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { setValue(&I, StoreNode); } -// Gather/scatter receive a vector of pointers. -// This vector of pointers may be represented as a base pointer + vector of -// indices, it depends on GEP and instruction preceeding GEP -// that calculates indices -static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index, +// Get a uniform base for the Gather/Scatter intrinsic. +// The first argument of the Gather/Scatter intrinsic is a vector of pointers. +// We try to represent it as a base pointer + vector of indices. +// Usually, the vector of pointers comes from a 'getelementptr' instruction. +// The first operand of the GEP may be a single pointer or a vector of pointers +// Example: +// %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind +// or +// %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind +// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, .. +// +// When the first GEP operand is a single pointer - it is the uniform base we +// are looking for. If first operand of the GEP is a splat vector - we +// extract the spalt value and use it as a uniform base. +// In all other cases the function returns 'false'. +// +static bool getUniformBase(const Value *& Ptr, SDValue& Base, SDValue& Index, SelectionDAGBuilder* SDB) { - assert (Ptr->getType()->isVectorTy() && "Uexpected pointer type"); - GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); - if (!Gep || Gep->getNumOperands() > 2) + SelectionDAG& DAG = SDB->DAG; + LLVMContext &Context = *DAG.getContext(); + + assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type"); + const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); + if (!GEP || GEP->getNumOperands() > 2) return false; - ShuffleVectorInst *ShuffleInst = - dyn_cast<ShuffleVectorInst>(Gep->getPointerOperand()); - if (!ShuffleInst || !ShuffleInst->getMask()->isNullValue() || - cast<Instruction>(ShuffleInst->getOperand(0))->getOpcode() != - Instruction::InsertElement) + + const Value *GEPPtr = GEP->getPointerOperand(); + if (!GEPPtr->getType()->isVectorTy()) + Ptr = GEPPtr; + else if (!(Ptr = getSplatValue(GEPPtr))) return false; - Ptr = cast<InsertElementInst>(ShuffleInst->getOperand(0))->getOperand(1); + Value *IndexVal = GEP->getOperand(1); - SelectionDAG& DAG = SDB->DAG; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - // Check is the Ptr is inside current basic block - // If not, look for the shuffle instruction - if (SDB->findValue(Ptr)) - Base = SDB->getValue(Ptr); - else if (SDB->findValue(ShuffleInst)) { - SDValue ShuffleNode = SDB->getValue(ShuffleInst); - SDLoc sdl = ShuffleNode; - Base = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, sdl, - ShuffleNode.getValueType().getScalarType(), ShuffleNode, - DAG.getConstant(0, sdl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - SDB->setValue(Ptr, Base); - } - else + // The operands of the GEP may be defined in another basic block. + // In this case we'll not find nodes for the operands. + if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal)) return false; - Value *IndexVal = Gep->getOperand(1); - if (SDB->findValue(IndexVal)) { - Index = SDB->getValue(IndexVal); + Base = SDB->getValue(Ptr); + Index = SDB->getValue(IndexVal); - if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) { + // Suppress sign extension. + if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) { + if (SDB->findValue(Sext->getOperand(0))) { IndexVal = Sext->getOperand(0); - if (SDB->findValue(IndexVal)) - Index = SDB->getValue(IndexVal); + Index = SDB->getValue(IndexVal); } - return true; } - return false; + if (!Index.getValueType().isVector()) { + unsigned GEPWidth = GEP->getType()->getVectorNumElements(); + EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth); + SmallVector<SDValue, 16> Ops(GEPWidth, Index); + Index = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Index), VT, Ops); + } + return true; } void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDLoc sdl = getCurSDLoc(); // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask) - Value *Ptr = I.getArgOperand(1); + const Value *Ptr = I.getArgOperand(1); SDValue Src0 = getValue(I.getArgOperand(0)); SDValue Mask = getValue(I.getArgOperand(3)); EVT VT = Src0.getValueType(); @@ -3150,10 +3370,10 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDValue Base; SDValue Index; - Value *BasePtr = Ptr; + const Value *BasePtr = Ptr; bool UniformBase = getUniformBase(BasePtr, Base, Index, this); - Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; + const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(MemOpBasePtr), MachineMemOperand::MOStore, VT.getStoreSize(), @@ -3190,7 +3410,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { SDValue InChain = DAG.getRoot(); if (AA->pointsToConstantMemory(MemoryLocation( - PtrOperand, AA->getTypeStoreSize(I.getType()), AAInfo))) { + PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), + AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. InChain = DAG.getEntryNode(); } @@ -3212,7 +3433,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDLoc sdl = getCurSDLoc(); // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0) - Value *Ptr = I.getArgOperand(0); + const Value *Ptr = I.getArgOperand(0); SDValue Src0 = getValue(I.getArgOperand(3)); SDValue Mask = getValue(I.getArgOperand(2)); @@ -3229,12 +3450,13 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDValue Root = DAG.getRoot(); SDValue Base; SDValue Index; - Value *BasePtr = Ptr; + const Value *BasePtr = Ptr; bool UniformBase = getUniformBase(BasePtr, Base, Index, this); bool ConstantMemory = false; if (UniformBase && - AA->pointsToConstantMemory( - MemoryLocation(BasePtr, AA->getTypeStoreSize(I.getType()), AAInfo))) { + AA->pointsToConstantMemory(MemoryLocation( + BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()), + AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; @@ -3511,6 +3733,8 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) { static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, SelectionDAG &DAG) { + // TODO: What fast-math-flags should be set on the floating-point nodes? + // IntegerPartOfX = ((int32_t)(t0); SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); @@ -3609,6 +3833,8 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // #define LOG2OFe 1.4426950f // t0 = Op * LOG2OFe + + // TODO: What fast-math-flags should be set here? SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, getF32Constant(DAG, 0x3fb8aa3b, dl)); return getLimitedPrecisionExp2(t0, dl, DAG); @@ -3622,6 +3848,9 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { + + // TODO: What fast-math-flags should be set on the floating-point nodes? + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); @@ -3718,6 +3947,9 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { + + // TODO: What fast-math-flags should be set on the floating-point nodes? + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); @@ -3813,6 +4045,9 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { + + // TODO: What fast-math-flags should be set on the floating-point nodes? + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); @@ -3922,6 +4157,7 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, } } + // TODO: What fast-math-flags should be set on the FMUL node? if (IsExp10) { // Put the exponent in the right bit position for later addition to the // final result: @@ -3955,9 +4191,9 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, return DAG.getConstantFP(1.0, DL, LHS.getValueType()); const Function *F = DAG.getMachineFunction().getFunction(); - if (!F->hasFnAttribute(Attribute::OptimizeForSize) || - // If optimizing for size, don't insert too many multiplies. This - // inserts up to 5 multiplies. + if (!F->optForSize() || + // If optimizing for size, don't insert too many multiplies. + // This inserts up to 5 multiplies. countPopulation(Val) + Log2_32(Val) < 7) { // We use the simple binary decomposition method to generate the multiply // sequence. There are more optimal ways to do this (for example, @@ -3965,6 +4201,8 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, // the benefit of being both really simple and much better than a libcall. SDValue Res; // Logically starts equal to 1.0 SDValue CurSquare = LHS; + // TODO: Intrinsics should have fast-math-flags that propagate to these + // nodes. while (Val) { if (Val & 1) { if (Res.getNode()) @@ -3990,22 +4228,20 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); } -// getTruncatedArgReg - Find underlying register used for an truncated -// argument. -static unsigned getTruncatedArgReg(const SDValue &N) { - if (N.getOpcode() != ISD::TRUNCATE) +// getUnderlyingArgReg - Find underlying register used for a truncated or +// bitcasted argument. +static unsigned getUnderlyingArgReg(const SDValue &N) { + switch (N.getOpcode()) { + case ISD::CopyFromReg: + return cast<RegisterSDNode>(N.getOperand(1))->getReg(); + case ISD::BITCAST: + case ISD::AssertZext: + case ISD::AssertSext: + case ISD::TRUNCATE: + return getUnderlyingArgReg(N.getOperand(0)); + default: return 0; - - const SDValue &Ext = N.getOperand(0); - if (Ext.getOpcode() == ISD::AssertZext || - Ext.getOpcode() == ISD::AssertSext) { - const SDValue &CFR = Ext.getOperand(0); - if (CFR.getOpcode() == ISD::CopyFromReg) - return cast<RegisterSDNode>(CFR.getOperand(1))->getReg(); - if (CFR.getOpcode() == ISD::TRUNCATE) - return getTruncatedArgReg(CFR); } - return 0; } /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function @@ -4033,11 +4269,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( Op = MachineOperand::CreateFI(FI); if (!Op && N.getNode()) { - unsigned Reg; - if (N.getOpcode() == ISD::CopyFromReg) - Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); - else - Reg = getTruncatedArgReg(N); + unsigned Reg = getUnderlyingArgReg(N); if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { MachineRegisterInfo &RegInfo = MF.getRegInfo(); unsigned PR = RegInfo.getLiveInPhysReg(Reg); @@ -4145,14 +4377,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::longjmp: return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { - // FIXME: this definition of "user defined address space" is x86-specific - // Assert for address < 256 since we support only user defined address - // spaces. - assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() - < 256 && - cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace() - < 256 && - "Unknown address space"); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); @@ -4169,12 +4393,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memset: { - // FIXME: this definition of "user defined address space" is x86-specific - // Assert for address < 256 since we support only user defined address - // spaces. - assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() - < 256 && - "Unknown address space"); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); @@ -4189,14 +4407,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memmove: { - // FIXME: this definition of "user defined address space" is x86-specific - // Assert for address < 256 since we support only user defined address - // spaces. - assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() - < 256 && - cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace() - < 256 && - "Unknown address space"); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); @@ -4238,33 +4448,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) Address = BCI->getOperand(0); // Parameters are handled specially. - bool isParameter = Variable->getTag() == dwarf::DW_TAG_arg_variable || - isa<Argument>(Address); - - const AllocaInst *AI = dyn_cast<AllocaInst>(Address); - - if (isParameter && !AI) { - FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); - if (FINode) - // Byval parameter. We have a frame index at this point. - SDV = DAG.getFrameIndexDbgValue( - Variable, Expression, FINode->getIndex(), 0, dl, SDNodeOrder); - else { - // Address is an argument, so try to emit its dbg value using - // virtual register info from the FuncInfo.ValueMap. - EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, - N); - return nullptr; - } - } else if (AI) + bool isParameter = Variable->isParameter() || isa<Argument>(Address); + auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); + if (isParameter && FINode) { + // Byval parameter. We have a frame index at this point. + SDV = DAG.getFrameIndexDbgValue(Variable, Expression, + FINode->getIndex(), 0, dl, SDNodeOrder); + } else if (isa<Argument>(Address)) { + // Address is an argument, so try to emit its dbg value using + // virtual register info from the FuncInfo.ValueMap. + EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, + N); + return nullptr; + } else { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), true, 0, dl, SDNodeOrder); - else { - // Can't do anything with other non-AI cases yet. - DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); - DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t"); - DEBUG(Address->dump()); - return nullptr; } DAG.AddDbgValue(SDV, N.getNode(), isParameter); } else { @@ -4315,12 +4513,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Check unused arguments map. N = UnusedArgNodeMap[V]; if (N.getNode()) { - // A dbg.value for an alloca is always indirect. - bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, - IsIndirect, N)) { + false, N)) { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), - IsIndirect, Offset, dl, SDNodeOrder); + false, Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, N.getNode(), false); } } else if (!V->use_empty() ) { @@ -4421,6 +4617,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getRoot(), getValue(I.getArgOperand(0)))); return nullptr; } + case Intrinsic::eh_sjlj_setup_dispatch: { + DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other, + getRoot())); + return nullptr; + } case Intrinsic::masked_gather: visitMaskedGather(I); @@ -4614,6 +4815,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); } else { + // TODO: Intrinsic calls should have fast-math-flags. SDValue Mul = DAG.getNode(ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), @@ -4652,6 +4854,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(Res.getValue(1)); return nullptr; } + case Intrinsic::bitreverse: + setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); + return nullptr; case Intrinsic::bswap: setValue(&I, DAG.getNode(ISD::BSWAP, sdl, getValue(I.getArgOperand(0)).getValueType(), @@ -4693,6 +4900,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); return nullptr; } + case Intrinsic::get_dynamic_area_offset: { + SDValue Op = getRoot(); + EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); + EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); + // Result type for @llvm.get.dynamic.area.offset should match PtrTy for + // target. + if (PtrTy != ResTy) + report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset" + " intrinsic!"); + Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy), + Op); + DAG.setRoot(Op); + setValue(&I, Res); + return nullptr; + } case Intrinsic::stackprotector: { // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); @@ -4743,8 +4965,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue FIN = DAG.getFrameIndex(FI, PtrTy); // Store the stack protector onto the stack. - Res = DAG.getStore(Chain, sdl, Src, FIN, - MachinePointerInfo::getFixedStack(FI), + Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FI), true, false, 0); setValue(&I, Res); DAG.setRoot(Res); @@ -4946,9 +5168,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::clear_cache: return TLI.getClearCacheBuiltinName(); - case Intrinsic::eh_actions: - setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); - return nullptr; case Intrinsic::donothing: // ignore return nullptr; @@ -4965,9 +5184,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { visitStatepoint(I); return nullptr; } - case Intrinsic::experimental_gc_result_int: - case Intrinsic::experimental_gc_result_float: - case Intrinsic::experimental_gc_result_ptr: case Intrinsic::experimental_gc_result: { visitGCResult(I); return nullptr; @@ -4978,7 +5194,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::instrprof_increment: llvm_unreachable("instrprof failed to lower an increment"); - + case Intrinsic::instrprof_value_profile: + llvm_unreachable("instrprof failed to lower a value profiling call"); case Intrinsic::localescape: { MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); @@ -5032,19 +5249,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } - case Intrinsic::eh_begincatch: - case Intrinsic::eh_endcatch: - llvm_unreachable("begin/end catch intrinsics not lowered in codegen"); + + case Intrinsic::eh_exceptionpointer: case Intrinsic::eh_exceptioncode: { - unsigned Reg = TLI.getExceptionPointerRegister(); - assert(Reg && "cannot get exception code on this platform"); + // Get the exception pointer vreg, copy from it, and resize it to fit. + const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0)); MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT); - assert(FuncInfo.MBB->isLandingPad() && "eh.exceptioncode in non-lpad"); - unsigned VReg = FuncInfo.MBB->addLiveIn(Reg, PtrRC); + unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC); SDValue N = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT); - N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32); + if (Intrinsic == Intrinsic::eh_exceptioncode) + N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32); setValue(&I, N); return nullptr; } @@ -5053,11 +5269,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, - MachineBasicBlock *LandingPad) { + const BasicBlock *EHPadBB) { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); MCSymbol *BeginLabel = nullptr; - if (LandingPad) { + if (EHPadBB) { // Insert a label before the invoke call to mark the try range. This can be // used to detect deletion of the invoke via the MachineModuleInfo. BeginLabel = MMI.getContext().createTempSymbol(); @@ -5067,7 +5283,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, unsigned CallSiteIndex = MMI.getCurrentCallSite(); if (CallSiteIndex) { MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); - LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex); + LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex); // Now that the call site is handled, stop tracking it. MMI.setCurrentCallSite(0); @@ -5100,14 +5316,21 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, DAG.setRoot(Result.second); } - if (LandingPad) { + if (EHPadBB) { // Insert a label at the end of the invoke call to mark the try range. This // can be used to detect deletion of the invoke via the MachineModuleInfo. MCSymbol *EndLabel = MMI.getContext().createTempSymbol(); DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); // Inform MachineModuleInfo of range. - MMI.addInvoke(LandingPad, BeginLabel, EndLabel); + if (MMI.hasEHFunclets()) { + assert(CLI.CS); + WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); + EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS->getInstruction()), + BeginLabel, EndLabel); + } else { + MMI.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); + } } return Result; @@ -5115,7 +5338,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool isTailCall, - MachineBasicBlock *LandingPad) { + const BasicBlock *EHPadBB) { PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); FunctionType *FTy = cast<FunctionType>(PT->getElementType()); Type *RetTy = FTy->getReturnType(); @@ -5154,7 +5377,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) .setCallee(RetTy, FTy, Callee, std::move(Args), CS) .setTailCall(isTailCall); - std::pair<SDValue,SDValue> Result = lowerInvokable(CLI, LandingPad); + std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); if (Result.first.getNode()) setValue(CS.getInstruction(), Result.first); @@ -5978,7 +6201,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { - const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); std::pair<unsigned, const TargetRegisterClass *> MatchRC = TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, OpInfo.ConstraintVT); @@ -6037,10 +6260,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout())); - Chain = DAG.getStore(Chain, getCurSDLoc(), - OpInfo.CallOperand, StackSlot, - MachinePointerInfo::getFixedStack(SSFI), - false, false, 0); + Chain = DAG.getStore( + Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), + false, false, 0); OpInfo.CallOperand = StackSlot; } @@ -6460,12 +6683,9 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { /// This is a helper for lowering intrinsics that follow a target calling /// convention or require stack pointer adjustment. Only a subset of the /// intrinsic's operands need to participate in the calling convention. -std::pair<SDValue, SDValue> -SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, - unsigned NumArgs, SDValue Callee, - Type *ReturnTy, - MachineBasicBlock *LandingPad, - bool IsPatchPoint) { +std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands( + ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, + Type *ReturnTy, const BasicBlock *EHPadBB, bool IsPatchPoint) { TargetLowering::ArgListTy Args; Args.reserve(NumArgs); @@ -6489,7 +6709,7 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs) .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint); - return lowerInvokable(CLI, LandingPad); + return lowerInvokable(CLI, EHPadBB); } /// \brief Add a stack map intrinsic call's live variable operands to a stackmap @@ -6593,7 +6813,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, - MachineBasicBlock *LandingPad) { + const BasicBlock *EHPadBB) { // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, // i32 <numBytes>, // i8* <target>, @@ -6630,9 +6850,8 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; Type *ReturnTy = IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); - std::pair<SDValue, SDValue> Result = - lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, - LandingPad, true); + std::pair<SDValue, SDValue> Result = lowerCallOperands( + CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, EHPadBB, true); SDNode *CallEnd = Result.second.getNode(); if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) @@ -6926,8 +7145,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { i, j*Parts[j].getValueType().getStoreSize()); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); - else if (j != 0) + else if (j != 0) { MyFlags.Flags.setOrigAlign(1); + if (j == NumParts - 1) + MyFlags.Flags.setSplitEnd(); + } CLI.Outs.push_back(MyFlags); CLI.OutVals.push_back(Parts[j]); @@ -6986,8 +7208,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { PtrVT)); SDValue L = CLI.DAG.getLoad( RetTys[i], CLI.DL, CLI.Chain, Add, - MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false, - false, false, 1); + MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), + DemoteStackIdx, Offsets[i]), + false, false, false, 1); ReturnValues[i] = L; Chains[i] = L.getValue(1); } @@ -7069,9 +7292,9 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { if (FastISel) return A->use_empty(); - const BasicBlock *Entry = A->getParent()->begin(); + const BasicBlock &Entry = A->getParent()->front(); for (const User *U : A->users()) - if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U)) + if (cast<Instruction>(U)->getParent() != &Entry || isa<SwitchInst>(U)) return false; // Use not in entry block. return true; @@ -7138,6 +7361,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // in the various CC lowering callbacks. Flags.setByVal(); } + if (F.getCallingConv() == CallingConv::X86_INTR) { + // IA Interrupt passes frame (1st parameter) by value in the stack. + if (Idx == 1) + Flags.setByVal(); + } if (Flags.isByVal() || Flags.isInAlloca()) { PointerType *Ty = cast<PointerType>(I->getType()); Type *ElementTy = Ty->getElementType(); @@ -7165,8 +7393,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) { if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 - else if (i > 0) + else if (i > 0) { MyFlags.Flags.setOrigAlign(1); + if (i == NumRegs - 1) + MyFlags.Flags.setSplitEnd(); + } Ins.push_back(MyFlags); } if (NeedsRegBlock && Value == NumValues - 1) @@ -7235,12 +7466,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // If this argument is unused then remember its value. It is used to generate // debugging information. if (I->use_empty() && NumValues) { - SDB->setUnusedArgValue(I, InVals[i]); + SDB->setUnusedArgValue(&*I, InVals[i]); // Also remember any frame index for use in FastISel. if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(InVals[i].getNode())) - FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); } for (unsigned Val = 0; Val != NumValues; ++Val) { @@ -7270,18 +7501,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Note down frame index. if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) - FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues), SDB->getCurSDLoc()); - SDB->setValue(I, Res); + SDB->setValue(&*I, Res); if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(Res.getOperand(0).getNode())) if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) - FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); } // If this argument is live outside of the entry block, insert a copy from @@ -7293,13 +7524,13 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // uses with vregs. unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { - FuncInfo->ValueMap[I] = Reg; + FuncInfo->ValueMap[&*I] = Reg; continue; } } - if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) { - FuncInfo->InitializeRegForValue(I); - SDB->CopyToExportRegsIfNeeded(I); + if (!isOnlyUsedInEntryBlock(&*I, TM.Options.EnableFastISel)) { + FuncInfo->InitializeRegForValue(&*I); + SDB->CopyToExportRegsIfNeeded(&*I); } } @@ -7401,21 +7632,21 @@ AddSuccessorMBB(const BasicBlock *BB, // If SuccBB has not been created yet, create it. if (!SuccMBB) { MachineFunction *MF = ParentMBB->getParent(); - MachineFunction::iterator BBI = ParentMBB; + MachineFunction::iterator BBI(ParentMBB); SuccMBB = MF->CreateMachineBasicBlock(BB); MF->insert(++BBI, SuccMBB); } // Add it as a successor of ParentMBB. ParentMBB->addSuccessor( - SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely)); + SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely)); return SuccMBB; } MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) { - MachineFunction::iterator I = MBB; + MachineFunction::iterator I(MBB); if (++I == FuncInfo.MF->end()) return nullptr; - return I; + return &*I; } /// During lowering new call nodes can be created (such as memset, etc.). @@ -7469,14 +7700,18 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, CaseCluster &JTCluster) { assert(First <= Last); - uint32_t Weight = 0; + auto Prob = BranchProbability::getZero(); unsigned NumCmps = 0; std::vector<MachineBasicBlock*> Table; - DenseMap<MachineBasicBlock*, uint32_t> JTWeights; + DenseMap<MachineBasicBlock*, BranchProbability> JTProbs; + + // Initialize probabilities in JTProbs. + for (unsigned I = First; I <= Last; ++I) + JTProbs[Clusters[I].MBB] = BranchProbability::getZero(); + for (unsigned I = First; I <= Last; ++I) { assert(Clusters[I].Kind == CC_Range); - Weight += Clusters[I].Weight; - assert(Weight >= Clusters[I].Weight && "Weight overflow!"); + Prob += Clusters[I].Prob; APInt Low = Clusters[I].Low->getValue(); APInt High = Clusters[I].High->getValue(); NumCmps += (Low == High) ? 1 : 2; @@ -7491,10 +7726,10 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, uint64_t ClusterSize = (High - Low).getLimitedValue() + 1; for (uint64_t J = 0; J < ClusterSize; ++J) Table.push_back(Clusters[I].MBB); - JTWeights[Clusters[I].MBB] += Clusters[I].Weight; + JTProbs[Clusters[I].MBB] += Clusters[I].Prob; } - unsigned NumDests = JTWeights.size(); + unsigned NumDests = JTProbs.size(); if (isSuitableForBitTests(NumDests, NumCmps, Clusters[First].Low->getValue(), Clusters[Last].High->getValue())) { @@ -7513,9 +7748,10 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, for (MachineBasicBlock *Succ : Table) { if (Done.count(Succ)) continue; - addSuccessorWithWeight(JumpTableMBB, Succ, JTWeights[Succ]); + addSuccessorWithProb(JumpTableMBB, Succ, JTProbs[Succ]); Done.insert(Succ); } + JumpTableMBB->normalizeSuccProbs(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding()) @@ -7529,7 +7765,7 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, JTCases.emplace_back(std::move(JTH), std::move(JT)); JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High, - JTCases.size() - 1, Weight); + JTCases.size() - 1, Prob); return true; } @@ -7707,19 +7943,29 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, .getSizeInBits(); assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!"); - if (Low.isNonNegative() && High.slt(BitWidth)) { - // Optimize the case where all the case values fit in a - // word without having to subtract minValue. In this case, - // we can optimize away the subtraction. + // Check if the clusters cover a contiguous range such that no value in the + // range will jump to the default statement. + bool ContiguousRange = true; + for (int64_t I = First + 1; I <= Last; ++I) { + if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) { + ContiguousRange = false; + break; + } + } + + if (Low.isStrictlyPositive() && High.slt(BitWidth)) { + // Optimize the case where all the case values fit in a word without having + // to subtract minValue. In this case, we can optimize away the subtraction. LowBound = APInt::getNullValue(Low.getBitWidth()); CmpRange = High; + ContiguousRange = false; } else { LowBound = Low; CmpRange = High - Low; } CaseBitsVector CBV; - uint32_t TotalWeight = 0; + auto TotalProb = BranchProbability::getZero(); for (unsigned i = First; i <= Last; ++i) { // Find the CaseBits for this destination. unsigned j; @@ -7727,39 +7973,40 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, if (CBV[j].BB == Clusters[i].MBB) break; if (j == CBV.size()) - CBV.push_back(CaseBits(0, Clusters[i].MBB, 0, 0)); + CBV.push_back( + CaseBits(0, Clusters[i].MBB, 0, BranchProbability::getZero())); CaseBits *CB = &CBV[j]; - // Update Mask, Bits and ExtraWeight. + // Update Mask, Bits and ExtraProb. uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue(); uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue(); assert(Hi >= Lo && Hi < 64 && "Invalid bit case!"); CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo; CB->Bits += Hi - Lo + 1; - CB->ExtraWeight += Clusters[i].Weight; - TotalWeight += Clusters[i].Weight; - assert(TotalWeight >= Clusters[i].Weight && "Weight overflow!"); + CB->ExtraProb += Clusters[i].Prob; + TotalProb += Clusters[i].Prob; } BitTestInfo BTI; std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) { - // Sort by weight first, number of bits second. - if (a.ExtraWeight != b.ExtraWeight) - return a.ExtraWeight > b.ExtraWeight; + // Sort by probability first, number of bits second. + if (a.ExtraProb != b.ExtraProb) + return a.ExtraProb > b.ExtraProb; return a.Bits > b.Bits; }); for (auto &CB : CBV) { MachineBasicBlock *BitTestBB = FuncInfo.MF->CreateMachineBasicBlock(SI->getParent()); - BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight)); + BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb)); } BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange), - SI->getCondition(), -1U, MVT::Other, false, nullptr, - nullptr, std::move(BTI)); + SI->getCondition(), -1U, MVT::Other, false, + ContiguousRange, nullptr, nullptr, std::move(BTI), + TotalProb); BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High, - BitTestCases.size() - 1, TotalWeight); + BitTestCases.size() - 1, TotalProb); return true; } @@ -7868,9 +8115,9 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, MachineBasicBlock *DefaultMBB) { MachineFunction *CurMF = FuncInfo.MF; MachineBasicBlock *NextMBB = nullptr; - MachineFunction::iterator BBI = W.MBB; + MachineFunction::iterator BBI(W.MBB); if (++BBI != FuncInfo.MF->end()) - NextMBB = BBI; + NextMBB = &*BBI; unsigned Size = W.LastCluster - W.FirstCluster + 1; @@ -7906,13 +8153,16 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, ISD::SETEQ); // Update successor info. - // Both Small and Big will jump to Small.BB, so we sum up the weights. - addSuccessorWithWeight(SwitchMBB, Small.MBB, Small.Weight + Big.Weight); - addSuccessorWithWeight( - SwitchMBB, DefaultMBB, - // The default destination is the first successor in IR. - BPI ? BPI->getEdgeWeight(SwitchMBB->getBasicBlock(), (unsigned)0) - : 0); + // Both Small and Big will jump to Small.BB, so we sum up the + // probabilities. + addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob); + if (BPI) + addSuccessorWithProb( + SwitchMBB, DefaultMBB, + // The default destination is the first successor in IR. + BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0)); + else + addSuccessorWithProb(SwitchMBB, DefaultMBB); // Insert the true branch. SDValue BrCond = @@ -7929,17 +8179,17 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } if (TM.getOptLevel() != CodeGenOpt::None) { - // Order cases by weight so the most likely case will be checked first. + // Order cases by probability so the most likely case will be checked first. std::sort(W.FirstCluster, W.LastCluster + 1, [](const CaseCluster &a, const CaseCluster &b) { - return a.Weight > b.Weight; + return a.Prob > b.Prob; }); // Rearrange the case blocks so that the last one falls through if possible - // without without changing the order of weights. + // without without changing the order of probabilities. for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) { --I; - if (I->Weight > W.LastCluster->Weight) + if (I->Prob > W.LastCluster->Prob) break; if (I->Kind == CC_Range && I->MBB == NextMBB) { std::swap(*I, *W.LastCluster); @@ -7948,12 +8198,11 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } } - // Compute total weight. - uint32_t UnhandledWeights = 0; - for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) { - UnhandledWeights += I->Weight; - assert(UnhandledWeights >= I->Weight && "Weight overflow!"); - } + // Compute total probability. + BranchProbability DefaultProb = W.DefaultProb; + BranchProbability UnhandledProbs = DefaultProb; + for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) + UnhandledProbs += I->Prob; MachineBasicBlock *CurMBB = W.MBB; for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) { @@ -7967,6 +8216,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } + UnhandledProbs -= I->Prob; switch (I->Kind) { case CC_JumpTable: { @@ -7977,8 +8227,28 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, // The jump block hasn't been inserted yet; insert it here. MachineBasicBlock *JumpMBB = JT->MBB; CurMF->insert(BBI, JumpMBB); - addSuccessorWithWeight(CurMBB, Fallthrough); - addSuccessorWithWeight(CurMBB, JumpMBB); + + auto JumpProb = I->Prob; + auto FallthroughProb = UnhandledProbs; + + // If the default statement is a target of the jump table, we evenly + // distribute the default probability to successors of CurMBB. Also + // update the probability on the edge from JumpMBB to Fallthrough. + for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(), + SE = JumpMBB->succ_end(); + SI != SE; ++SI) { + if (*SI == DefaultMBB) { + JumpProb += DefaultProb / 2; + FallthroughProb -= DefaultProb / 2; + JumpMBB->setSuccProbability(SI, DefaultProb / 2); + JumpMBB->normalizeSuccProbs(); + break; + } + } + + addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb); + addSuccessorWithProb(CurMBB, JumpMBB, JumpProb); + CurMBB->normalizeSuccProbs(); // The jump table header will be inserted in our current block, do the // range check, and fall through to our fallthrough block. @@ -8004,8 +8274,17 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, BTB->Parent = CurMBB; BTB->Default = Fallthrough; - // If we're in the right place, emit the bit test header header right now. - if (CurMBB ==SwitchMBB) { + BTB->DefaultProb = UnhandledProbs; + // If the cases in bit test don't form a contiguous range, we evenly + // distribute the probability on the edge to Fallthrough to two + // successors of CurMBB. + if (!BTB->ContiguousRange) { + BTB->Prob += DefaultProb / 2; + BTB->DefaultProb -= DefaultProb / 2; + } + + // If we're in the right place, emit the bit test header right now. + if (CurMBB == SwitchMBB) { visitBitTestHeader(*BTB, SwitchMBB); BTB->Emitted = true; } @@ -8028,10 +8307,9 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, RHS = I->High; } - // The false weight is the sum of all unhandled cases. - UnhandledWeights -= I->Weight; - CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Weight, - UnhandledWeights); + // The false probability is the sum of all unhandled cases. + CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Prob, + UnhandledProbs); if (CurMBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); @@ -8049,8 +8327,8 @@ unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC, CaseClusterIt First, CaseClusterIt Last) { return std::count_if(First, Last + 1, [&](const CaseCluster &X) { - if (X.Weight != CC.Weight) - return X.Weight > CC.Weight; + if (X.Prob != CC.Prob) + return X.Prob > CC.Prob; // Ties are broken by comparing the case value. return X.Low->getValue().slt(CC.Low->getValue()); @@ -8066,24 +8344,24 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!"); - // Balance the tree based on branch weights to create a near-optimal (in terms - // of search time given key frequency) binary search tree. See e.g. Kurt + // Balance the tree based on branch probabilities to create a near-optimal (in + // terms of search time given key frequency) binary search tree. See e.g. Kurt // Mehlhorn "Nearly Optimal Binary Search Trees" (1975). CaseClusterIt LastLeft = W.FirstCluster; CaseClusterIt FirstRight = W.LastCluster; - uint32_t LeftWeight = LastLeft->Weight; - uint32_t RightWeight = FirstRight->Weight; + auto LeftProb = LastLeft->Prob + W.DefaultProb / 2; + auto RightProb = FirstRight->Prob + W.DefaultProb / 2; // Move LastLeft and FirstRight towards each other from opposite directions to - // find a partitioning of the clusters which balances the weight on both - // sides. If LeftWeight and RightWeight are equal, alternate which side is - // taken to ensure 0-weight nodes are distributed evenly. + // find a partitioning of the clusters which balances the probability on both + // sides. If LeftProb and RightProb are equal, alternate which side is + // taken to ensure 0-probability nodes are distributed evenly. unsigned I = 0; while (LastLeft + 1 < FirstRight) { - if (LeftWeight < RightWeight || (LeftWeight == RightWeight && (I & 1))) - LeftWeight += (++LastLeft)->Weight; + if (LeftProb < RightProb || (LeftProb == RightProb && (I & 1))) + LeftProb += (++LastLeft)->Prob; else - RightWeight += (--FirstRight)->Weight; + RightProb += (--FirstRight)->Prob; I++; } @@ -8144,7 +8422,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, const ConstantInt *Pivot = PivotCluster->Low; // New blocks will be inserted immediately after the current one. - MachineFunction::iterator BBI = W.MBB; + MachineFunction::iterator BBI(W.MBB); ++BBI; // We will branch to the LHS if Value < Pivot. If LHS is a single cluster, @@ -8158,7 +8436,8 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, } else { LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); FuncInfo.MF->insert(BBI, LeftMBB); - WorkList.push_back({LeftMBB, FirstLeft, LastLeft, W.GE, Pivot}); + WorkList.push_back( + {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2}); // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } @@ -8173,14 +8452,15 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, } else { RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); FuncInfo.MF->insert(BBI, RightMBB); - WorkList.push_back({RightMBB, FirstRight, LastRight, Pivot, W.LT}); + WorkList.push_back( + {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2}); // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } // Create the CaseBlock record that will be used to lower the branch. CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB, - LeftWeight, RightWeight); + LeftProb, RightProb); if (W.MBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); @@ -8196,9 +8476,10 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { for (auto I : SI.cases()) { MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()]; const ConstantInt *CaseVal = I.getCaseValue(); - uint32_t Weight = - BPI ? BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex()) : 0; - Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Weight)); + BranchProbability Prob = + BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex()) + : BranchProbability(1, SI.getNumCases() + 1); + Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob)); } MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()]; @@ -8274,7 +8555,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { SwitchWorkList WorkList; CaseClusterIt First = Clusters.begin(); CaseClusterIt Last = Clusters.end() - 1; - WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr}); + auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB); + WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); while (!WorkList.empty()) { SwitchWorkListItem W = WorkList.back(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 700675453fe7..49a3872d20c8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -17,6 +17,7 @@ #include "StatepointLowering.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -30,7 +31,6 @@ namespace llvm { class AddrSpaceCastInst; -class AliasAnalysis; class AllocaInst; class BasicBlock; class BitCastInst; @@ -154,39 +154,39 @@ private: unsigned JTCasesIndex; unsigned BTCasesIndex; }; - uint32_t Weight; + BranchProbability Prob; static CaseCluster range(const ConstantInt *Low, const ConstantInt *High, - MachineBasicBlock *MBB, uint32_t Weight) { + MachineBasicBlock *MBB, BranchProbability Prob) { CaseCluster C; C.Kind = CC_Range; C.Low = Low; C.High = High; C.MBB = MBB; - C.Weight = Weight; + C.Prob = Prob; return C; } static CaseCluster jumpTable(const ConstantInt *Low, const ConstantInt *High, unsigned JTCasesIndex, - uint32_t Weight) { + BranchProbability Prob) { CaseCluster C; C.Kind = CC_JumpTable; C.Low = Low; C.High = High; C.JTCasesIndex = JTCasesIndex; - C.Weight = Weight; + C.Prob = Prob; return C; } static CaseCluster bitTests(const ConstantInt *Low, const ConstantInt *High, - unsigned BTCasesIndex, uint32_t Weight) { + unsigned BTCasesIndex, BranchProbability Prob) { CaseCluster C; C.Kind = CC_BitTests; C.Low = Low; C.High = High; C.BTCasesIndex = BTCasesIndex; - C.Weight = Weight; + C.Prob = Prob; return C; } }; @@ -198,13 +198,13 @@ private: uint64_t Mask; MachineBasicBlock* BB; unsigned Bits; - uint32_t ExtraWeight; + BranchProbability ExtraProb; CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits, - uint32_t Weight): - Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { } + BranchProbability Prob): + Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) { } - CaseBits() : Mask(0), BB(nullptr), Bits(0), ExtraWeight(0) {} + CaseBits() : Mask(0), BB(nullptr), Bits(0) {} }; typedef std::vector<CaseBits> CaseBitsVector; @@ -217,13 +217,13 @@ private: /// blocks needed by multi-case switch statements. struct CaseBlock { CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs, - const Value *cmpmiddle, - MachineBasicBlock *truebb, MachineBasicBlock *falsebb, - MachineBasicBlock *me, - uint32_t trueweight = 0, uint32_t falseweight = 0) - : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs), - TrueBB(truebb), FalseBB(falsebb), ThisBB(me), - TrueWeight(trueweight), FalseWeight(falseweight) { } + const Value *cmpmiddle, MachineBasicBlock *truebb, + MachineBasicBlock *falsebb, MachineBasicBlock *me, + BranchProbability trueprob = BranchProbability::getUnknown(), + BranchProbability falseprob = BranchProbability::getUnknown()) + : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs), + TrueBB(truebb), FalseBB(falsebb), ThisBB(me), TrueProb(trueprob), + FalseProb(falseprob) {} // CC - the condition code to use for the case block's setcc node ISD::CondCode CC; @@ -239,8 +239,8 @@ private: // ThisBB - the block into which to emit the code for the setcc and branches MachineBasicBlock *ThisBB; - // TrueWeight/FalseWeight - branch weights. - uint32_t TrueWeight, FalseWeight; + // TrueProb/FalseProb - branch weights. + BranchProbability TrueProb, FalseProb; }; struct JumpTable { @@ -272,32 +272,35 @@ private: struct BitTestCase { BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr, - uint32_t Weight): - Mask(M), ThisBB(T), TargetBB(Tr), ExtraWeight(Weight) { } + BranchProbability Prob): + Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) { } uint64_t Mask; MachineBasicBlock *ThisBB; MachineBasicBlock *TargetBB; - uint32_t ExtraWeight; + BranchProbability ExtraProb; }; typedef SmallVector<BitTestCase, 3> BitTestInfo; struct BitTestBlock { - BitTestBlock(APInt F, APInt R, const Value* SV, - unsigned Rg, MVT RgVT, bool E, - MachineBasicBlock* P, MachineBasicBlock* D, - BitTestInfo C): - First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), - Parent(P), Default(D), Cases(std::move(C)) { } + BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, + bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D, + BitTestInfo C, BranchProbability Pr) + : First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), + ContiguousRange(CR), Parent(P), Default(D), Cases(std::move(C)), + Prob(Pr) {} APInt First; APInt Range; const Value *SValue; unsigned Reg; MVT RegVT; bool Emitted; + bool ContiguousRange; MachineBasicBlock *Parent; MachineBasicBlock *Default; BitTestInfo Cases; + BranchProbability Prob; + BranchProbability DefaultProb; }; /// Minimum jump table density, in percent. @@ -339,6 +342,7 @@ private: CaseClusterIt LastCluster; const ConstantInt *GE; const ConstantInt *LT; + BranchProbability DefaultProb; }; typedef SmallVector<SwitchWorkListItem, 4> SwitchWorkList; @@ -515,6 +519,7 @@ private: void resetPerFunctionState() { FailureMBB = nullptr; Guard = nullptr; + GuardReg = 0; } MachineBasicBlock *getParentMBB() { return ParentMBB; } @@ -592,10 +597,6 @@ public: /// FunctionLoweringInfo &FuncInfo; - /// OptLevel - What optimization level we're generating code for. - /// - CodeGenOpt::Level OptLevel; - /// GFI - Garbage collection metadata for the function. GCFunctionInfo *GFI; @@ -613,7 +614,7 @@ public: SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, CodeGenOpt::Level ol) : CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), - DAG(dag), FuncInfo(funcinfo), OptLevel(ol), + DAG(dag), FuncInfo(funcinfo), HasTailCall(false) { } @@ -692,19 +693,20 @@ public: void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, - MachineBasicBlock *SwitchBB, unsigned Opc, - uint32_t TW, uint32_t FW); + MachineBasicBlock *SwitchBB, + Instruction::BinaryOps Opc, BranchProbability TW, + BranchProbability FW); void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, - uint32_t TW, uint32_t FW); + BranchProbability TW, BranchProbability FW); bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases); bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB); void CopyToExportRegsIfNeeded(const Value *V); void ExportFromCurrentBlock(const Value *V); void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, - MachineBasicBlock *LandingPad = nullptr); + const BasicBlock *EHPadBB = nullptr); std::pair<SDValue, SDValue> lowerCallOperands( ImmutableCallSite CS, @@ -712,7 +714,7 @@ public: unsigned NumArgs, SDValue Callee, Type *ReturnTy, - MachineBasicBlock *LandingPad = nullptr, + const BasicBlock *EHPadBB = nullptr, bool IsPatchPoint = false); /// UpdateSplitBlock - When an MBB was split during scheduling, update the @@ -722,11 +724,11 @@ public: // This function is responsible for the whole statepoint lowering process. // It uniformly handles invoke and call statepoints. void LowerStatepoint(ImmutableStatepoint Statepoint, - MachineBasicBlock *LandingPad = nullptr); + const BasicBlock *EHPadBB = nullptr); private: - std::pair<SDValue, SDValue> lowerInvokable( - TargetLowering::CallLoweringInfo &CLI, - MachineBasicBlock *LandingPad); + std::pair<SDValue, SDValue> + lowerInvokable(TargetLowering::CallLoweringInfo &CLI, + const BasicBlock *EHPadBB = nullptr); // Terminator instructions. void visitRet(const ReturnInst &I); @@ -734,11 +736,18 @@ private: void visitSwitch(const SwitchInst &I); void visitIndirectBr(const IndirectBrInst &I); void visitUnreachable(const UnreachableInst &I); + void visitCleanupRet(const CleanupReturnInst &I); + void visitCatchSwitch(const CatchSwitchInst &I); + void visitCatchRet(const CatchReturnInst &I); + void visitCatchPad(const CatchPadInst &I); + void visitCleanupPad(const CleanupPadInst &CPI); + + BranchProbability getEdgeProbability(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const; + void addSuccessorWithProb( + MachineBasicBlock *Src, MachineBasicBlock *Dst, + BranchProbability Prob = BranchProbability::getUnknown()); - uint32_t getEdgeWeight(const MachineBasicBlock *Src, - const MachineBasicBlock *Dst) const; - void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst, - uint32_t Weight = 0); public: void visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB); @@ -748,7 +757,7 @@ public: void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB); void visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, - uint32_t BranchWeightToNext, + BranchProbability BranchProbToNext, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB); @@ -842,7 +851,7 @@ private: void visitVACopy(const CallInst &I); void visitStackmap(const CallInst &I); void visitPatchpoint(ImmutableCallSite CS, - MachineBasicBlock *LandingPad = nullptr); + const BasicBlock *EHPadBB = nullptr); // These three are implemented in StatepointLowering.cpp void visitStatepoint(const CallInst &I); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 5b9b18286fae..a1c6c4c1dd63 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" +#include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" @@ -30,6 +31,11 @@ #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; +static cl::opt<bool> +VerboseDAGDumping("dag-dump-verbose", cl::Hidden, + cl::desc("Display more information when dumping selection " + "DAG nodes.")); + std::string SDNode::getOperationName(const SelectionDAG *G) const { switch (getOpcode()) { default: @@ -102,6 +108,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::EH_RETURN: return "EH_RETURN"; case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; + case ISD::EH_SJLJ_SETUP_DISPATCH: return "EH_SJLJ_SETUP_DISPATCH"; case ISD::ConstantPool: return "ConstantPool"; case ISD::TargetIndex: return "TargetIndex"; case ISD::ExternalSymbol: return "ExternalSymbol"; @@ -145,6 +152,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FABS: return "fabs"; case ISD::FMINNUM: return "fminnum"; case ISD::FMAXNUM: return "fmaxnum"; + case ISD::FMINNAN: return "fminnan"; + case ISD::FMAXNAN: return "fmaxnan"; case ISD::FNEG: return "fneg"; case ISD::FSQRT: return "fsqrt"; case ISD::FSIN: return "fsin"; @@ -201,6 +210,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FPOWI: return "fpowi"; case ISD::SETCC: return "setcc"; + case ISD::SETCCE: return "setcce"; case ISD::SELECT: return "select"; case ISD::VSELECT: return "vselect"; case ISD::SELECT_CC: return "select_cc"; @@ -273,6 +283,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::CALLSEQ_START: return "callseq_start"; case ISD::CALLSEQ_END: return "callseq_end"; + // EH instructions + case ISD::CATCHRET: return "catchret"; + case ISD::CLEANUPRET: return "cleanupret"; + // Other operators case ISD::LOAD: return "load"; case ISD::STORE: return "store"; @@ -295,15 +309,17 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::LIFETIME_END: return "lifetime.end"; case ISD::GC_TRANSITION_START: return "gc_transition.start"; case ISD::GC_TRANSITION_END: return "gc_transition.end"; + case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset"; // Bit manipulation + case ISD::BITREVERSE: return "bitreverse"; case ISD::BSWAP: return "bswap"; case ISD::CTPOP: return "ctpop"; case ISD::CTTZ: return "cttz"; case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef"; case ISD::CTLZ: return "ctlz"; case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef"; - + // Trampolines case ISD::INIT_TRAMPOLINE: return "init_trampoline"; case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline"; @@ -320,7 +336,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SETO: return "seto"; case ISD::SETUO: return "setuo"; - case ISD::SETUEQ: return "setue"; + case ISD::SETUEQ: return "setueq"; case ISD::SETUGT: return "setugt"; case ISD::SETUGE: return "setuge"; case ISD::SETULT: return "setult"; @@ -352,6 +368,16 @@ const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { } } +static Printable PrintNodeId(const SDNode &Node) { + return Printable([&Node](raw_ostream &OS) { +#ifndef NDEBUG + OS << 't' << Node.PersistentId; +#else + OS << (const void*)&Node; +#endif + }); +} + void SDNode::dump() const { dump(nullptr); } void SDNode::dump(const SelectionDAG *G) const { print(dbgs(), G); @@ -359,8 +385,6 @@ void SDNode::dump(const SelectionDAG *G) const { } void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { - OS << (const void*)this << ": "; - for (unsigned i = 0, e = getNumValues(); i != e; ++i) { if (i) OS << ","; if (getValueType(i) == MVT::Other) @@ -368,7 +392,6 @@ void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { else OS << getValueType(i).getEVTString(); } - OS << " = " << getOperationName(G); } void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { @@ -523,48 +546,58 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { << ']'; } - if (unsigned Order = getIROrder()) - OS << " [ORD=" << Order << ']'; + if (VerboseDAGDumping) { + if (unsigned Order = getIROrder()) + OS << " [ORD=" << Order << ']'; - if (getNodeId() != -1) - OS << " [ID=" << getNodeId() << ']'; + if (getNodeId() != -1) + OS << " [ID=" << getNodeId() << ']'; - if (!G) - return; + if (!G) + return; - DILocation *L = getDebugLoc(); - if (!L) - return; + DILocation *L = getDebugLoc(); + if (!L) + return; + + if (auto *Scope = L->getScope()) + OS << Scope->getFilename(); + else + OS << "<unknown>"; + OS << ':' << L->getLine(); + if (unsigned C = L->getColumn()) + OS << ':' << C; + } +} - if (auto *Scope = L->getScope()) - OS << Scope->getFilename(); - else - OS << "<unknown>"; - OS << ':' << L->getLine(); - if (unsigned C = L->getColumn()) - OS << ':' << C; +/// Return true if this node is so simple that we should just print it inline +/// if it appears as an operand. +static bool shouldPrintInline(const SDNode &Node) { + if (Node.getOpcode() == ISD::EntryToken) + return false; + return Node.getNumOperands() == 0; } static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { - for (const SDValue &Op : N->op_values()) + for (const SDValue &Op : N->op_values()) { + if (shouldPrintInline(*Op.getNode())) + continue; if (Op.getNode()->hasOneUse()) DumpNodes(Op.getNode(), indent+2, G); - else - dbgs() << "\n" << std::string(indent+2, ' ') - << (void*)Op.getNode() << ": <multiple use>"; + } - dbgs() << '\n'; dbgs().indent(indent); N->dump(G); } void SelectionDAG::dump() const { - dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; + dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n"; for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I) { - const SDNode *N = I; - if (!N->hasOneUse() && N != getRoot().getNode()) + const SDNode *N = &*I; + if (!N->hasOneUse() && N != getRoot().getNode() && + (!shouldPrintInline(*N) || N->use_empty())) DumpNodes(N, 2, this); } @@ -573,10 +606,30 @@ void SelectionDAG::dump() const { } void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { + OS << PrintNodeId(*this) << ": "; print_types(OS, G); + OS << " = " << getOperationName(G); print_details(OS, G); } +static bool printOperand(raw_ostream &OS, const SelectionDAG *G, + const SDValue Value) { + if (!Value.getNode()) { + OS << "<null>"; + return false; + } else if (shouldPrintInline(*Value.getNode())) { + OS << Value->getOperationName(G) << ':'; + Value->print_types(OS, G); + Value->print_details(OS, G); + return true; + } else { + OS << PrintNodeId(*Value.getNode()); + if (unsigned RN = Value.getResNo()) + OS << ':' << RN; + return false; + } +} + typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet; static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, const SelectionDAG *G, VisitedSDNodeSet &once) { @@ -589,20 +642,13 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, // Having printed this SDNode, walk the children: for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - const SDNode *child = N->getOperand(i).getNode(); - if (i) OS << ","; OS << " "; - if (child->getNumOperands() == 0) { - // This child has no grandchildren; print it inline right here. - child->printr(OS, G); - once.insert(child); - } else { // Just the address. FIXME: also print the child's opcode. - OS << (const void*)child; - if (unsigned RN = N->getOperand(i).getResNo()) - OS << ":" << RN; - } + const SDValue Op = N->getOperand(i); + bool printedInline = printOperand(OS, G, Op); + if (printedInline) + once.insert(Op.getNode()); } OS << "\n"; @@ -664,12 +710,9 @@ void SDNode::dumprFull(const SelectionDAG *G) const { } void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { - print_types(OS, G); + printr(OS, G); for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { if (i) OS << ", "; else OS << " "; - OS << (void*)getOperand(i).getNode(); - if (unsigned RN = getOperand(i).getResNo()) - OS << ":" << RN; + printOperand(OS, G, getOperand(i)); } - print_details(OS, G); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 97ece8b9248a..853a21a15eb9 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" @@ -263,13 +264,17 @@ namespace llvm { return; IS.OptLevel = NewOptLevel; IS.TM.setOptLevel(NewOptLevel); - SavedFastISel = IS.TM.Options.EnableFastISel; - if (NewOptLevel == CodeGenOpt::None) - IS.TM.setFastISel(true); DEBUG(dbgs() << "\nChanging optimization level for Function " << IS.MF->getFunction()->getName() << "\n"); DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel << " ; After: -O" << NewOptLevel << "\n"); + SavedFastISel = IS.TM.Options.EnableFastISel; + if (NewOptLevel == CodeGenOpt::None) { + IS.TM.setFastISel(IS.TM.getO0WantsFastISel()); + DEBUG(dbgs() << "\tFastISel is " + << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled") + << "\n"); + } } ~OptLevelChanger() { @@ -293,6 +298,11 @@ namespace llvm { const TargetLowering *TLI = IS->TLI; const TargetSubtargetInfo &ST = IS->MF->getSubtarget(); + // Try first to see if the Target has its own way of selecting a scheduler + if (auto *SchedulerCtor = ST.getDAGScheduler(OptLevel)) { + return SchedulerCtor(IS, OptLevel); + } + if (OptLevel == CodeGenOpt::None || (ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) || TLI->getSchedulingPreference() == Sched::Source) @@ -350,8 +360,9 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, OptLevel(OL), DAGSize(0) { initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); - initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry()); - initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry()); + initializeBranchProbabilityInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); + initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry()); initializeTargetLibraryInfoWrapperPassPass( *PassRegistry::getPassRegistry()); } @@ -363,13 +374,12 @@ SelectionDAGISel::~SelectionDAGISel() { } void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<AliasAnalysis>(); - AU.addPreserved<AliasAnalysis>(); + AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<GCModuleInfo>(); AU.addPreserved<GCModuleInfo>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); if (UseMBPI && OptLevel != CodeGenOpt::None) - AU.addRequired<BranchProbabilityInfo>(); + AU.addRequired<BranchProbabilityInfoWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -380,10 +390,10 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { /// /// This is required for correctness, so it must be done at -O0. /// -static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) { +static void SplitCriticalSideEffectEdges(Function &Fn) { // Loop for blocks with phi nodes. - for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { - PHINode *PN = dyn_cast<PHINode>(BB->begin()); + for (BasicBlock &BB : Fn) { + PHINode *PN = dyn_cast<PHINode>(BB.begin()); if (!PN) continue; ReprocessBlock: @@ -391,7 +401,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) { // are potentially trapping constant expressions. Constant expressions are // the only potentially trapping value that can occur as the argument to a // PHI. - for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I) + for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I)); ++I) for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i)); if (!CE || !CE->canTrap()) continue; @@ -405,8 +415,8 @@ static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) { // Okay, we have to split this edge. SplitCriticalEdge( - Pred->getTerminator(), GetSuccessorNumber(Pred, BB), - CriticalEdgeSplittingOptions(AA).setMergeIdenticalEdges()); + Pred->getTerminator(), GetSuccessorNumber(Pred, &BB), + CriticalEdgeSplittingOptions().setMergeIdenticalEdges()); goto ReprocessBlock; } } @@ -437,19 +447,19 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TII = MF->getSubtarget().getInstrInfo(); TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); - AA = &getAnalysis<AliasAnalysis>(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr; DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); - SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), AA); + SplitCriticalSideEffectEdges(const_cast<Function &>(Fn)); CurDAG->init(*MF); FuncInfo->set(Fn, *MF, CurDAG); if (UseMBPI && OptLevel != CodeGenOpt::None) - FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>(); + FuncInfo->BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); else FuncInfo->BPI = nullptr; @@ -457,15 +467,50 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MF->setHasInlineAsm(false); + FuncInfo->SplitCSR = false; + SmallVector<MachineBasicBlock*, 4> Returns; + + // We split CSR if the target supports it for the given function + // and the function has only return exits. + if (TLI->supportSplitCSR(MF)) { + FuncInfo->SplitCSR = true; + + // Collect all the return blocks. + for (const BasicBlock &BB : Fn) { + if (!succ_empty(&BB)) + continue; + + const TerminatorInst *Term = BB.getTerminator(); + if (isa<UnreachableInst>(Term)) + continue; + if (isa<ReturnInst>(Term)) { + Returns.push_back(FuncInfo->MBBMap[&BB]); + continue; + } + + // Bail out if the exit block is not Return nor Unreachable. + FuncInfo->SplitCSR = false; + break; + } + } + + MachineBasicBlock *EntryMBB = &MF->front(); + if (FuncInfo->SplitCSR) + // This performs initialization so lowering for SplitCSR will be correct. + TLI->initializeSplitCSR(EntryMBB); + SelectAllBasicBlocks(Fn); // If the first basic block in the function has live ins that need to be // copied into vregs, emit the copies into the top of the block before // emitting the code for the block. - MachineBasicBlock *EntryMBB = MF->begin(); const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII); + // Insert copies in the entry block and the return blocks. + if (FuncInfo->SplitCSR) + TLI->insertCopiesSplitCSR(EntryMBB, Returns); + DenseMap<unsigned, unsigned> LiveInMap; if (!FuncInfo->ArgDbgValues.empty()) for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(), @@ -882,7 +927,7 @@ void SelectionDAGISel::DoInstructionSelection() { // graph) and preceding back toward the beginning (the entry // node). while (ISelPosition != CurDAG->allnodes_begin()) { - SDNode *Node = --ISelPosition; + SDNode *Node = &*--ISelPosition; // Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes, // but there are currently some corner cases that it misses. Also, this // makes it theoretically possible to disable the DAGCombiner. @@ -916,14 +961,47 @@ void SelectionDAGISel::DoInstructionSelection() { PostprocessISelDAG(); } +static bool hasExceptionPointerOrCodeUser(const CatchPadInst *CPI) { + for (const User *U : CPI->users()) { + if (const IntrinsicInst *EHPtrCall = dyn_cast<IntrinsicInst>(U)) { + Intrinsic::ID IID = EHPtrCall->getIntrinsicID(); + if (IID == Intrinsic::eh_exceptionpointer || + IID == Intrinsic::eh_exceptioncode) + return true; + } + } + return false; +} + /// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and /// do other setup for EH landing-pad blocks. bool SelectionDAGISel::PrepareEHLandingPad() { MachineBasicBlock *MBB = FuncInfo->MBB; - + const Constant *PersonalityFn = FuncInfo->Fn->getPersonalityFn(); + const BasicBlock *LLVMBB = MBB->getBasicBlock(); const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy(CurDAG->getDataLayout())); + // Catchpads have one live-in register, which typically holds the exception + // pointer or code. + if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) { + if (hasExceptionPointerOrCodeUser(CPI)) { + // Get or create the virtual register to hold the pointer or code. Mark + // the live in physreg and copy into the vreg. + MCPhysReg EHPhysReg = TLI->getExceptionPointerRegister(PersonalityFn); + assert(EHPhysReg && "target lacks exception pointer register"); + MBB->addLiveIn(EHPhysReg); + unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC); + BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), + TII->get(TargetOpcode::COPY), VReg) + .addReg(EHPhysReg, RegState::Kill); + } + return true; + } + + if (!LLVMBB->isLandingPad()) + return true; + // Add a label to mark the beginning of the landing pad. Deletion of the // landing pad can thus be detected via the MachineModuleInfo. MCSymbol *Label = MF->getMMI().addLandingPad(MBB); @@ -935,52 +1013,12 @@ bool SelectionDAGISel::PrepareEHLandingPad() { BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); - // If this is an MSVC-style personality function, we need to split the landing - // pad into several BBs. - const BasicBlock *LLVMBB = MBB->getBasicBlock(); - const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst(); - MF->getMMI().addPersonality(MBB, cast<Function>(LPadInst->getParent() - ->getParent() - ->getPersonalityFn() - ->stripPointerCasts())); - EHPersonality Personality = MF->getMMI().getPersonalityType(); - - if (isMSVCEHPersonality(Personality)) { - SmallVector<MachineBasicBlock *, 4> ClauseBBs; - const IntrinsicInst *ActionsCall = - dyn_cast<IntrinsicInst>(LLVMBB->getFirstInsertionPt()); - // Get all invoke BBs that unwind to this landingpad. - SmallVector<MachineBasicBlock *, 4> InvokeBBs(MBB->pred_begin(), - MBB->pred_end()); - if (ActionsCall && ActionsCall->getIntrinsicID() == Intrinsic::eh_actions) { - // If this is a call to llvm.eh.actions followed by indirectbr, then we've - // run WinEHPrepare, and we should remove this block from the machine CFG. - // Mark the targets of the indirectbr as landingpads instead. - for (const BasicBlock *LLVMSucc : successors(LLVMBB)) { - MachineBasicBlock *ClauseBB = FuncInfo->MBBMap[LLVMSucc]; - // Add the edge from the invoke to the clause. - for (MachineBasicBlock *InvokeBB : InvokeBBs) - InvokeBB->addSuccessor(ClauseBB); - - // Mark the clause as a landing pad or MI passes will delete it. - ClauseBB->setIsLandingPad(); - } - } - - // Remove the edge from the invoke to the lpad. - for (MachineBasicBlock *InvokeBB : InvokeBBs) - InvokeBB->removeSuccessor(MBB); - - // Don't select instructions for the landingpad. - return false; - } - // Mark exception register as live in. - if (unsigned Reg = TLI->getExceptionPointerRegister()) + if (unsigned Reg = TLI->getExceptionPointerRegister(PersonalityFn)) FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC); // Mark exception selector register as live in. - if (unsigned Reg = TLI->getExceptionSelectorRegister()) + if (unsigned Reg = TLI->getExceptionSelectorRegister(PersonalityFn)) FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC); return true; @@ -992,9 +1030,9 @@ bool SelectionDAGISel::PrepareEHLandingPad() { static bool isFoldedOrDeadInstruction(const Instruction *I, FunctionLoweringInfo *FuncInfo) { return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded. - !isa<TerminatorInst>(I) && // Terminators aren't folded. + !isa<TerminatorInst>(I) && // Terminators aren't folded. !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded. - !isa<LandingPadInst>(I) && // Landingpad instructions aren't folded. + !I->isEHPad() && // EH pad instructions aren't folded. !FuncInfo->isExportedInst(I); // Exported instrs must be computed. } @@ -1143,17 +1181,20 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FuncInfo->VisitedBBs.insert(LLVMBB); } - BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI(); + BasicBlock::const_iterator const Begin = + LLVMBB->getFirstNonPHI()->getIterator(); BasicBlock::const_iterator const End = LLVMBB->end(); BasicBlock::const_iterator BI = End; FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB]; + if (!FuncInfo->MBB) + continue; // Some blocks like catchpads have no code or MBB. FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); // Setup an EH landing-pad block. FuncInfo->ExceptionPointerVirtReg = 0; FuncInfo->ExceptionSelectorVirtReg = 0; - if (LLVMBB->isLandingPad()) + if (LLVMBB->isEHPad()) if (!PrepareEHLandingPad()) continue; @@ -1192,7 +1233,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { unsigned NumFastIselRemaining = std::distance(Begin, End); // Do FastISel on as many instructions as possible. for (; BI != Begin; --BI) { - const Instruction *Inst = std::prev(BI); + const Instruction *Inst = &*std::prev(BI); // If we no longer require this instruction, skip it. if (isFoldedOrDeadInstruction(Inst, FuncInfo)) { @@ -1212,8 +1253,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // then see if there is a load right before the selected instructions. // Try to fold the load if so. const Instruction *BeforeInst = Inst; - while (BeforeInst != Begin) { - BeforeInst = std::prev(BasicBlock::const_iterator(BeforeInst)); + while (BeforeInst != &*Begin) { + BeforeInst = &*std::prev(BasicBlock::const_iterator(BeforeInst)); if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo)) break; } @@ -1245,7 +1286,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // For the purpose of debugging, just abort. report_fatal_error("FastISel didn't select the entire block"); - if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) { + if (!Inst->getType()->isVoidTy() && !Inst->getType()->isTokenTy() && + !Inst->use_empty()) { unsigned &R = FuncInfo->ValueMap[Inst]; if (!R) R = FuncInfo->CreateRegs(Inst->getType()); @@ -1253,7 +1295,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { bool HadTailCall = false; MachineBasicBlock::iterator SavedInsertPt = FuncInfo->InsertPt; - SelectBasicBlock(Inst, BI, HadTailCall); + SelectBasicBlock(Inst->getIterator(), BI, HadTailCall); // If the call was emitted as a tail call, we're done with the block. // We also need to delete any previously emitted instructions. @@ -1483,35 +1525,39 @@ SelectionDAGISel::FinishBasicBlock() { CodeGenAndEmitDAG(); } - uint32_t UnhandledWeight = 0; - for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) - UnhandledWeight += SDB->BitTestCases[i].Cases[j].ExtraWeight; - + BranchProbability UnhandledProb = SDB->BitTestCases[i].Prob; for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { - UnhandledWeight -= SDB->BitTestCases[i].Cases[j].ExtraWeight; + UnhandledProb -= SDB->BitTestCases[i].Cases[j].ExtraProb; // Set the current basic block to the mbb we wish to insert the code into FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB; FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code - if (j+1 != ej) - SDB->visitBitTestCase(SDB->BitTestCases[i], - SDB->BitTestCases[i].Cases[j+1].ThisBB, - UnhandledWeight, - SDB->BitTestCases[i].Reg, - SDB->BitTestCases[i].Cases[j], - FuncInfo->MBB); + + // If all cases cover a contiguous range, it is not necessary to jump to + // the default block after the last bit test fails. This is because the + // range check during bit test header creation has guaranteed that every + // case here doesn't go outside the range. + MachineBasicBlock *NextMBB; + if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej) + NextMBB = SDB->BitTestCases[i].Cases[j + 1].TargetBB; + else if (j + 1 != ej) + NextMBB = SDB->BitTestCases[i].Cases[j + 1].ThisBB; else - SDB->visitBitTestCase(SDB->BitTestCases[i], - SDB->BitTestCases[i].Default, - UnhandledWeight, - SDB->BitTestCases[i].Reg, - SDB->BitTestCases[i].Cases[j], - FuncInfo->MBB); + NextMBB = SDB->BitTestCases[i].Default; + SDB->visitBitTestCase(SDB->BitTestCases[i], + NextMBB, + UnhandledProb, + SDB->BitTestCases[i].Reg, + SDB->BitTestCases[i].Cases[j], + FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); CodeGenAndEmitDAG(); + + if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej) + break; } // Update PHI Nodes @@ -1642,14 +1688,7 @@ SelectionDAGISel::FinishBasicBlock() { /// one preferred by the target. /// ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() { - RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault(); - - if (!Ctor) { - Ctor = ISHeuristic; - RegisterScheduler::setDefault(Ctor); - } - - return Ctor(this, OptLevel); + return ISHeuristic(this, OptLevel); } //===----------------------------------------------------------------------===// @@ -1961,7 +2000,7 @@ SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) { } /// GetVBR - decode a vbr encoding whose top bit is set. -LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline uint64_t GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { assert(Val >= 128 && "Not a VBR"); Val &= 127; // Remove first vbr bit. @@ -2287,7 +2326,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, } /// CheckSame - Implements OP_CheckSame. -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) { @@ -2298,7 +2337,7 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, } /// CheckChildSame - Implements OP_CheckChildXSame. -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes, @@ -2310,20 +2349,20 @@ CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, } /// CheckPatternPredicate - Implements OP_CheckPatternPredicate. -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, const SelectionDAGISel &SDISel) { return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]); } /// CheckNodePredicate - Implements OP_CheckNodePredicate. -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, const SelectionDAGISel &SDISel, SDNode *N) { return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDNode *N) { uint16_t Opc = MatcherTable[MatcherIndex++]; @@ -2331,7 +2370,7 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, return N->getOpcode() == Opc; } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; @@ -2341,7 +2380,7 @@ CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(DL); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL, unsigned ChildNo) { @@ -2351,14 +2390,14 @@ CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, DL); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N) { return cast<CondCodeSDNode>(N)->get() == (ISD::CondCode)MatcherTable[MatcherIndex++]; } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; @@ -2369,7 +2408,7 @@ CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(DL); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N) { int64_t Val = MatcherTable[MatcherIndex++]; @@ -2380,7 +2419,7 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, return C && C->getSExtValue() == Val; } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, unsigned ChildNo) { if (ChildNo >= N.getNumOperands()) @@ -2388,7 +2427,7 @@ CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo)); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const SelectionDAGISel &SDISel) { int64_t Val = MatcherTable[MatcherIndex++]; @@ -2401,7 +2440,7 @@ CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, return C && SDISel.CheckAndMask(N.getOperand(0), C, Val); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const SelectionDAGISel &SDISel) { int64_t Val = MatcherTable[MatcherIndex++]; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 4df5ede388fc..2764688518c2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -80,9 +80,16 @@ namespace llvm { return true; } - static bool hasNodeAddressLabel(const SDNode *Node, - const SelectionDAG *Graph) { - return true; + static std::string getNodeIdentifierLabel(const SDNode *Node, + const SelectionDAG *Graph) { + std::string R; + raw_string_ostream OS(R); +#ifndef NDEBUG + OS << 't' << Node->PersistentId; +#else + OS << static_cast<const void *>(Node); +#endif + return R; } /// If you want to override the dot attributes printed for a particular diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 34688df4765b..050ec2116c5d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -95,6 +96,9 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType); const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); + auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo(); + MFI->markAsStatepointSpillSlotObjectIndex(FI); + Builder.FuncInfo.StatepointStackSlots.push_back(FI); AllocatedStackSlots.push_back(true); return SpillSlot; @@ -105,8 +109,8 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, return Builder.DAG.getFrameIndex(FI, ValueType); } // Note: We deliberately choose to advance this only on the failing path. - // Doing so on the suceeding path involes a bit of complexity that caused a - // minor bug previously. Unless performance shows this matters, please + // Doing so on the succeeding path involves a bit of complexity that caused + // a minor bug previously. Unless performance shows this matters, please // keep this code as simple as possible. NextSlotToAllocate++; } @@ -119,7 +123,7 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, static Optional<int> findPreviousSpillSlot(const Value *Val, SelectionDAGBuilder &Builder, int LookUpDepth) { - // Can not look any futher - give up now + // Can not look any further - give up now if (LookUpDepth <= 0) return Optional<int>(); @@ -196,7 +200,7 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, /// Try to find existing copies of the incoming values in stack slots used for /// statepoint spilling. If we can find a spill slot for the incoming value, /// mark that slot as allocated, and reuse the same slot for this safepoint. -/// This helps to avoid series of loads and stores that only serve to resuffle +/// This helps to avoid series of loads and stores that only serve to reshuffle /// values on the stack between calls. static void reservePreviousStackSlotForValue(const Value *IncomingValue, SelectionDAGBuilder &Builder) { @@ -255,7 +259,7 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases, SmallVectorImpl<const Value *> &Relocs, SelectionDAGBuilder &Builder) { - // This is horribly ineffecient, but I don't care right now + // This is horribly inefficient, but I don't care right now SmallSet<SDValue, 64> Seen; SmallVector<const Value *, 64> NewBases, NewPtrs, NewRelocs; @@ -283,13 +287,29 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases, /// call node. Also update NodeMap so that getValue(statepoint) will /// reference lowered call result static SDNode * -lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad, +lowerCallFromStatepoint(ImmutableStatepoint ISP, const BasicBlock *EHPadBB, SelectionDAGBuilder &Builder, SmallVectorImpl<SDValue> &PendingExports) { ImmutableCallSite CS(ISP.getCallSite()); - SDValue ActualCallee = Builder.getValue(ISP.getCalledValue()); + SDValue ActualCallee; + + if (ISP.getNumPatchBytes() > 0) { + // If we've been asked to emit a nop sequence instead of a call instruction + // for this statepoint then don't lower the call target, but use a constant + // `null` instead. Not lowering the call target lets statepoint clients get + // away without providing a physical address for the symbolic call target at + // link time. + + const auto &TLI = Builder.DAG.getTargetLoweringInfo(); + const auto &DL = Builder.DAG.getDataLayout(); + + unsigned AS = ISP.getCalledValue()->getType()->getPointerAddressSpace(); + ActualCallee = Builder.DAG.getConstant(0, Builder.getCurSDLoc(), + TLI.getPointerTy(DL, AS)); + } else + ActualCallee = Builder.getValue(ISP.getCalledValue()); assert(CS.getCallingConv() != CallingConv::AnyReg && "anyregcc is not supported on statepoints!"); @@ -300,7 +320,7 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad, SDValue ReturnValue, CallEndVal; std::tie(ReturnValue, CallEndVal) = Builder.lowerCallOperands( ISP.getCallSite(), ImmutableStatepoint::CallArgsBeginPos, - ISP.getNumCallArgs(), ActualCallee, DefTy, LandingPad, + ISP.getNumCallArgs(), ActualCallee, DefTy, EHPadBB, false /* IsPatchPoint */); SDNode *CallEnd = CallEndVal.getNode(); @@ -317,25 +337,33 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad, // ch, glue = callseq_end ch, glue // get_return_value ch, glue // - // get_return_value can either be a CopyFromReg to grab the return value from - // %RAX, or it can be a LOAD to load a value returned by reference via a stack - // slot. + // get_return_value can either be a sequence of CopyFromReg instructions + // to grab the return value from the return register(s), or it can be a LOAD + // to load a value returned by reference via a stack slot. - if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg || - CallEnd->getOpcode() == ISD::LOAD)) - CallEnd = CallEnd->getOperand(0).getNode(); + if (HasDef) { + if (CallEnd->getOpcode() == ISD::LOAD) + CallEnd = CallEnd->getOperand(0).getNode(); + else + while (CallEnd->getOpcode() == ISD::CopyFromReg) + CallEnd = CallEnd->getOperand(0).getNode(); + } assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "expected!"); - if (HasDef) { - if (CS.isInvoke()) { - // Result value will be used in different basic block for invokes - // so we need to export it now. But statepoint call has a different type - // than the actuall call. It means that standart exporting mechanism will - // create register of the wrong type. So instead we need to create - // register with correct type and save value into it manually. + // Export the result value if needed + const Instruction *GCResult = ISP.getGCResult(); + if (HasDef && GCResult) { + if (GCResult->getParent() != CS.getParent()) { + // Result value will be used in a different basic block so we need to + // export it now. + // Default exporting mechanism will not work here because statepoint call + // has a different type than the actual call. It means that by default + // llvm will create export register of the wrong type (always i32 in our + // case). So instead we need to create export register with correct type + // manually. // TODO: To eliminate this problem we can remove gc.result intrinsics - // completelly and make statepoint call to return a tuple. + // completely and make statepoint call to return a tuple. unsigned Reg = Builder.FuncInfo.CreateRegs(ISP.getActualReturnType()); RegsForValue RFV( *Builder.DAG.getContext(), Builder.DAG.getTargetLoweringInfo(), @@ -347,8 +375,9 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad, PendingExports.push_back(Chain); Builder.FuncInfo.ValueMap[CS.getInstruction()] = Reg; } else { - // The value of the statepoint itself will be the value of call itself. - // We'll replace the actually call node shortly. gc_result will grab + // Result value will be used in a same basic block. Don't export it or + // perform any explicit register copies. + // We'll replace the actuall call node shortly. gc_result will grab // this value. Builder.setValue(CS.getInstruction(), ReturnValue); } @@ -411,7 +440,8 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, // chaining stores one after another, this may allow // a bit more optimal scheduling for them Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc, - MachinePointerInfo::getFixedStack(Index), + MachinePointerInfo::getFixedStack( + Builder.DAG.getMachineFunction(), Index), false, false, 0); Builder.StatepointLowering.setLocation(Incoming, Loc); @@ -483,21 +513,21 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // to the GCStrategy from there (yet). GCStrategy &S = Builder.GFI->getStrategy(); for (const Value *V : Bases) { - auto Opt = S.isGCManagedPointer(V); + auto Opt = S.isGCManagedPointer(V->getType()); if (Opt.hasValue()) { assert(Opt.getValue() && "non gc managed base pointer found in statepoint"); } } for (const Value *V : Ptrs) { - auto Opt = S.isGCManagedPointer(V); + auto Opt = S.isGCManagedPointer(V->getType()); if (Opt.hasValue()) { assert(Opt.getValue() && "non gc managed derived pointer found in statepoint"); } } for (const Value *V : Relocations) { - auto Opt = S.isGCManagedPointer(V); + auto Opt = S.isGCManagedPointer(V->getType()); if (Opt.hasValue()) { assert(Opt.getValue() && "non gc managed pointer relocated"); } @@ -581,19 +611,21 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex(); } else { // Record value as visited, but not spilled. This is case for allocas - // and constants. For this values we can avoid emiting spill load while + // and constants. For this values we can avoid emitting spill load while // visiting corresponding gc_relocate. // Actually we do not need to record them in this map at all. - // We do this only to check that we are not relocating any unvisited value. + // We do this only to check that we are not relocating any unvisited + // value. SpillMap[V] = None; // Default llvm mechanisms for exporting values which are used in // different basic blocks does not work for gc relocates. // Note that it would be incorrect to teach llvm that all relocates are - // uses of the corresponging values so that it would automatically + // uses of the corresponding values so that it would automatically // export them. Relocates of the spilled values does not use original // value. - if (StatepointSite.getCallSite().isInvoke()) + if (RelocateOpers.getUnderlyingCallSite().getParent() != + StatepointInstr->getParent()) Builder.ExportFromCurrentBlock(V); } } @@ -608,7 +640,7 @@ void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) { } void SelectionDAGBuilder::LowerStatepoint( - ImmutableStatepoint ISP, MachineBasicBlock *LandingPad /*=nullptr*/) { + ImmutableStatepoint ISP, const BasicBlock *EHPadBB /*= nullptr*/) { // The basic scheme here is that information about both the original call and // the safepoint is encoded in the CallInst. We create a temporary call and // lower it, then reverse engineer the calling sequence. @@ -620,14 +652,12 @@ void SelectionDAGBuilder::LowerStatepoint( ImmutableCallSite CS(ISP.getCallSite()); #ifndef NDEBUG - // Consistency check. Don't do this for invokes. It would be too - // expensive to preserve this information across different basic blocks - if (!CS.isInvoke()) { - for (const User *U : CS->users()) { - const CallInst *Call = cast<CallInst>(U); - if (isGCRelocate(Call)) - StatepointLowering.scheduleRelocCall(*Call); - } + // Consistency check. Check only relocates in the same basic block as thier + // statepoint. + for (const User *U : CS->users()) { + const CallInst *Call = cast<CallInst>(U); + if (isGCRelocate(Call) && Call->getParent() == CS.getParent()) + StatepointLowering.scheduleRelocCall(*Call); } #endif @@ -648,7 +678,7 @@ void SelectionDAGBuilder::LowerStatepoint( // Get call node, we will replace it later with statepoint SDNode *CallNode = - lowerCallFromStatepoint(ISP, LandingPad, *this, PendingExports); + lowerCallFromStatepoint(ISP, EHPadBB, *this, PendingExports); // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END // nodes with all the appropriate arguments and return values. @@ -790,7 +820,7 @@ void SelectionDAGBuilder::LowerStatepoint( // Replace original call DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root - // Remove originall call node + // Remove original call node DAG.DeleteNode(CallNode); // DON'T set the root - under the assumption that it's already set past the @@ -809,8 +839,9 @@ void SelectionDAGBuilder::visitGCResult(const CallInst &CI) { Instruction *I = cast<Instruction>(CI.getArgOperand(0)); assert(isStatepoint(I) && "first argument must be a statepoint token"); - if (isa<InvokeInst>(I)) { - // For invokes we should have stored call result in a virtual register. + if (I->getParent() != CI.getParent()) { + // Statepoint is in different basic block so we should have stored call + // result in a virtual register. // We can not use default getValue() functionality to copy value from this // register because statepoint and actuall call return types can be // different, and getValue() will use CopyFromReg of the wrong type, @@ -833,9 +864,10 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) { #ifndef NDEBUG // Consistency check - // We skip this check for invoke statepoints. It would be too expensive to - // preserve validation info through different basic blocks. - if (!RelocateOpers.isTiedToInvoke()) { + // We skip this check for relocates not in the same basic block as thier + // statepoint. It would be too expensive to preserve validation info through + // different basic blocks. + if (RelocateOpers.getStatepoint()->getParent() == CI.getParent()) { StatepointLowering.relocCallVisited(CI); } #endif @@ -862,13 +894,14 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) { // Be conservative: flush all pending loads // TODO: Probably we can be less restrictive on this, - // it may allow more scheduling opprtunities + // it may allow more scheduling opportunities. SDValue Chain = getRoot(); SDValue SpillLoad = - DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot, - MachinePointerInfo::getFixedStack(*DerivedPtrLocation), - false, false, false, 0); + DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), + *DerivedPtrLocation), + false, false, false, 0); // Again, be conservative, don't emit pending loads DAG.setRoot(SpillLoad.getValue(1)); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index fbf651277c7f..c64d882d69a4 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -85,21 +85,22 @@ void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS, std::pair<SDValue, SDValue> TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, - const SDValue *Ops, unsigned NumOps, + ArrayRef<SDValue> Ops, bool isSigned, SDLoc dl, bool doesNotReturn, bool isReturnValueUsed) const { TargetLowering::ArgListTy Args; - Args.reserve(NumOps); + Args.reserve(Ops.size()); TargetLowering::ArgListEntry Entry; - for (unsigned i = 0; i != NumOps; ++i) { - Entry.Node = Ops[i]; + for (SDValue Op : Ops) { + Entry.Node = Op; Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); - Entry.isSExt = shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned); - Entry.isZExt = !shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned); + Entry.isSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned); + Entry.isZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned); Args.push_back(Entry); } + if (LC == RTLIB::UNKNOWN_LIBCALL) report_fatal_error("Unsupported library call operation!"); SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), @@ -115,9 +116,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, return LowerCallTo(CLI); } - -/// SoftenSetCCOperands - Soften the operands of a comparison. This code is -/// shared among BR_CC, SELECT_CC, and SETCC handlers. +/// Soften the operands of a comparison. This code is shared among BR_CC, +/// SELECT_CC, and SETCC handlers. void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, @@ -127,6 +127,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, // Expand into one or more soft-fp libcall(s). RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL; + bool ShouldInvertCC = false; switch (CCCode) { case ISD::SETEQ: case ISD::SETOEQ: @@ -166,34 +167,38 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : (VT == MVT::f64) ? RTLIB::O_F64 : RTLIB::O_F128; break; - default: + case ISD::SETONE: + // SETONE = SETOLT | SETOGT + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : + (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; + break; + case ISD::SETUEQ: LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128; + LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : + (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128; + break; + default: + // Invert CC for unordered comparisons + ShouldInvertCC = true; switch (CCCode) { - case ISD::SETONE: - // SETONE = SETOLT | SETOGT - LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : - (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; - // Fallthrough - case ISD::SETUGT: - LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : - (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; - break; - case ISD::SETUGE: - LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : - (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128; - break; case ISD::SETULT: - LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : - (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : + (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128; break; case ISD::SETULE: - LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : + LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : + (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; + break; + case ISD::SETUGT: + LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128; break; - case ISD::SETUEQ: - LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : - (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128; + case ISD::SETUGE: + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; break; default: llvm_unreachable("Do not know how to soften this setcc!"); } @@ -201,17 +206,21 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, // Use the target specific return value for comparions lib calls. EVT RetVT = getCmpLibcallReturnType(); - SDValue Ops[2] = { NewLHS, NewRHS }; - NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, + SDValue Ops[2] = {NewLHS, NewRHS}; + NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/, dl).first; NewRHS = DAG.getConstant(0, dl, RetVT); + CCCode = getCmpLibcallCC(LC1); + if (ShouldInvertCC) + CCCode = getSetCCInverse(CCCode, /*isInteger=*/true); + if (LC2 != RTLIB::UNKNOWN_LIBCALL) { SDValue Tmp = DAG.getNode( ISD::SETCC, dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), NewLHS, NewRHS, DAG.getCondCode(CCCode)); - NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, + NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/, dl).first; NewLHS = DAG.getNode( ISD::SETCC, dl, @@ -222,9 +231,8 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, } } -/// getJumpTableEncoding - Return the entry encoding for a jump table in the -/// current function. The returned value is a member of the -/// MachineJumpTableInfo::JTEntryKind enum. +/// Return the entry encoding for a jump table in the current function. The +/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. unsigned TargetLowering::getJumpTableEncoding() const { // In non-pic modes, just use the address of a block. if (getTargetMachine().getRelocationModel() != Reloc::PIC_) @@ -250,9 +258,8 @@ SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table, return Table; } -/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the -/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an -/// MCExpr. +/// This returns the relocation base for the given PIC jumptable, the same as +/// getPICJumpTableRelocBase, but as an MCExpr. const MCExpr * TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,MCContext &Ctx) const{ @@ -279,10 +286,9 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // Optimization Methods //===----------------------------------------------------------------------===// -/// ShrinkDemandedConstant - Check to see if the specified operand of the -/// specified instruction is a constant integer. If so, check to see if there -/// are any bits set in the constant that are not demanded. If so, shrink the -/// constant and return true. +/// Check to see if the specified operand of the specified instruction is a +/// constant integer. If so, check to see if there are any bits set in the +/// constant that are not demanded. If so, shrink the constant and return true. bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded) { SDLoc dl(Op); @@ -317,10 +323,9 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, return false; } -/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the -/// casts are free. This uses isZExtFree and ZERO_EXTEND for the widening -/// cast, but it could be generalized for targets with other types of -/// implicit widening casts. +/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. +/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be +/// generalized for targets with other types of implicit widening casts. bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, @@ -366,13 +371,13 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, return false; } -/// SimplifyDemandedBits - Look at Op. At this point, we know that only the -/// DemandedMask bits of the result of Op are ever used downstream. If we can -/// use this information to simplify Op, create a new simplified DAG node and -/// return true, returning the original and new nodes in Old and New. Otherwise, -/// analyze the expression and return a mask of KnownOne and KnownZero bits for -/// the expression (used to simplify the caller). The KnownZero/One bits may -/// only be accurate for those bits in the DemandedMask. +/// Look at Op. At this point, we know that only the DemandedMask bits of the +/// result of Op are ever used downstream. If we can use this information to +/// simplify Op, create a new simplified DAG node and return true, returning the +/// original and new nodes in Old and New. Otherwise, analyze the expression and +/// return a mask of KnownOne and KnownZero bits for the expression (used to +/// simplify the caller). The KnownZero/One bits may only be accurate for those +/// bits in the DemandedMask. bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, APInt &KnownZero, @@ -1061,7 +1066,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, Op.getOperand(0).getValueType().isFloatingPoint()) { bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); - if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) { + if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple() && + Op.getOperand(0).getValueType() != MVT::f128) { + // Cannot eliminate/lower SHL for f128 yet. EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32; // Make a FGETSIGN + SHL to move the sign bit into the appropriate // place. We expect the SHL to be eliminated by other optimizations. @@ -1120,9 +1127,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return false; } -/// computeKnownBitsForTargetNode - Determine which of the bits specified -/// in Mask are known to be either zero or one and return them in the -/// KnownZero/KnownOne bitsets. +/// Determine which of the bits specified in Mask are known to be either zero or +/// one and return them in the KnownZero/KnownOne bitsets. void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, @@ -1137,9 +1143,8 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); } -/// ComputeNumSignBitsForTargetNode - This method can be implemented by -/// targets that want to expose additional information about sign bits to the -/// DAG Combiner. +/// This method can be implemented by targets that want to expose additional +/// information about sign bits to the DAG Combiner. unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, const SelectionDAG &, unsigned Depth) const { @@ -1152,10 +1157,8 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, return 1; } -/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly -/// one bit set. This differs from computeKnownBits in that it doesn't need to -/// determine which bit is set. -/// +/// Test if the given value is known to have exactly one bit set. This differs +/// from computeKnownBits in that it doesn't need to determine which bit is set. static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { // A left-shift of a constant one will have exactly one bit set, because // shifting the bit off the end is undefined. @@ -1239,8 +1242,8 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const { return CN->isNullValue(); } -/// SimplifySetCC - Try to simplify a setcc built with the specified operands -/// and cc. If it is unable to simplify it, return a null SDValue. +/// Try to simplify a setcc built with the specified operands and cc. If it is +/// unable to simplify it, return a null SDValue. SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, @@ -1270,7 +1273,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); - if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { + if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { const APInt &C1 = N1C->getAPIntValue(); // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an @@ -1335,7 +1338,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, PreExt = N0->getOperand(0); } else if (N0->getOpcode() == ISD::AND) { // DAGCombine turns costly ZExts into ANDs - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) + if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) if ((C->getAPIntValue()+1).isPowerOf2()) { MinBits = C->getAPIntValue().countTrailingOnes(); PreExt = N0->getOperand(0); @@ -1345,7 +1348,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, MinBits = N0->getOperand(0).getValueSizeInBits(); PreExt = N0->getOperand(0); Signed = true; - } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) { + } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) { // ZEXTLOAD / SEXTLOAD if (LN0->getExtensionType() == ISD::ZEXTLOAD) { MinBits = LN0->getMemoryVT().getSizeInBits(); @@ -1697,8 +1700,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) && N0.getOpcode() == ISD::AND) { auto &DL = DAG.getDataLayout(); - if (ConstantSDNode *AndRHS = - dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { EVT ShiftTy = DCI.isBeforeLegalize() ? getPointerTy(DL) : getShiftAmountTy(N0.getValueType(), DL); @@ -1728,8 +1730,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // (X & -256) == 256 -> (X >> 8) == 1 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && N0.getOpcode() == ISD::AND && N0.hasOneUse()) { - if (ConstantSDNode *AndRHS = - dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { const APInt &AndRHSC = AndRHS->getAPIntValue(); if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { unsigned ShiftBits = AndRHSC.countTrailingZeros(); @@ -1783,7 +1784,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Constant fold or commute setcc. SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl); if (O.getNode()) return O; - } else if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) { + } else if (auto *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) { // If the RHS of an FP comparison is a constant, simplify it away in // some cases. if (CFP->getValueAPF().isNaN()) { @@ -1900,8 +1901,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // to be careful about increasing register pressure needlessly. bool LegalRHSImm = false; - if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) { - if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) { + if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { // Turn (X+C1) == C2 --> X == C2-C1 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) { return DAG.getSetCC(dl, VT, N0.getOperand(0), @@ -1924,7 +1925,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } // Turn (C1-X) == C2 --> X == C1-C2 - if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) { + if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) { if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) { return DAG.getSetCC(dl, VT, N0.getOperand(1), @@ -2075,12 +2076,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return SDValue(); } -/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the -/// node is a GlobalAddress + offset. +/// Returns true (and the GlobalValue and the offset) if the node is a +/// GlobalAddress + offset. bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const { - if (isa<GlobalAddressSDNode>(N)) { - GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N); + if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) { GA = GASD->getGlobal(); Offset += GASD->getOffset(); return true; @@ -2090,14 +2090,12 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA, SDValue N1 = N->getOperand(0); SDValue N2 = N->getOperand(1); if (isGAPlusOffset(N1.getNode(), GA, Offset)) { - ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); - if (V) { + if (auto *V = dyn_cast<ConstantSDNode>(N2)) { Offset += V->getSExtValue(); return true; } } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) { - ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); - if (V) { + if (auto *V = dyn_cast<ConstantSDNode>(N1)) { Offset += V->getSExtValue(); return true; } @@ -2107,9 +2105,8 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA, return false; } - -SDValue TargetLowering:: -PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { +SDValue TargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { // Default implementation: no optimization. return SDValue(); } @@ -2159,9 +2156,9 @@ TargetLowering::getConstraintType(StringRef Constraint) const { return C_Unknown; } -/// LowerXConstraint - try to replace an X constraint, which matches anything, -/// with another that has more specific requirements based on the type of the -/// corresponding operand. +/// Try to replace an X constraint, which matches anything, with another that +/// has more specific requirements based on the type of the corresponding +/// operand. const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{ if (ConstraintVT.isInteger()) return "r"; @@ -2170,8 +2167,8 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{ return nullptr; } -/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops -/// vector. If it is invalid, don't add anything to Ops. +/// Lower the specified operand into the Ops vector. +/// If it is invalid, don't add anything to Ops. void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, @@ -2284,31 +2281,30 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, //===----------------------------------------------------------------------===// // Constraint Selection. -/// isMatchingInputConstraint - Return true of this is an input operand that is -/// a matching constraint like "4". +/// Return true of this is an input operand that is a matching constraint like +/// "4". bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const { assert(!ConstraintCode.empty() && "No known constraint!"); return isdigit(static_cast<unsigned char>(ConstraintCode[0])); } -/// getMatchedOperand - If this is an input matching constraint, this method -/// returns the output operand it matches. +/// If this is an input matching constraint, this method returns the output +/// operand it matches. unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const { assert(!ConstraintCode.empty() && "No known constraint!"); return atoi(ConstraintCode.c_str()); } - -/// ParseConstraints - Split up the constraint string from the inline -/// assembly value into the specific constraints and their prefixes, -/// and also tie in the associated operand values. +/// Split up the constraint string from the inline assembly value into the +/// specific constraints and their prefixes, and also tie in the associated +/// operand values. /// If this returns an empty vector, and if the constraint string itself /// isn't empty, there was an error parsing. TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, ImmutableCallSite CS) const { - /// ConstraintOperands - Information about all of the constraints. + /// Information about all of the constraints. AsmOperandInfoVector ConstraintOperands; const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); unsigned maCount = 0; // Largest number of multiple alternative constraints. @@ -2483,16 +2479,13 @@ TargetLowering::ParseConstraints(const DataLayout &DL, " incompatible type!"); } } - } } return ConstraintOperands; } - -/// getConstraintGenerality - Return an integer indicating how general CT -/// is. +/// Return an integer indicating how general CT is. static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { switch (CT) { case TargetLowering::C_Other: @@ -2581,8 +2574,8 @@ TargetLowering::ConstraintWeight return weight; } -/// ChooseConstraint - If there are multiple different constraints that we -/// could pick for this operand (e.g. "imr") try to pick the 'best' one. +/// If there are multiple different constraints that we could pick for this +/// operand (e.g. "imr") try to pick the 'best' one. /// This is somewhat tricky: constraints fall into four classes: /// Other -> immediates and magic values /// Register -> one specific register @@ -2649,9 +2642,8 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, OpInfo.ConstraintType = BestType; } -/// ComputeConstraintToUse - Determines the constraint code and constraint -/// type to use for the specific AsmOperandInfo, setting -/// OpInfo.ConstraintCode and OpInfo.ConstraintType. +/// Determines the constraint code and constraint type to use for the specific +/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType. void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG) const { @@ -2717,6 +2709,16 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d, return Mul; } +SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + std::vector<SDNode *> *Created) const { + AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.isIntDivCheap(N->getValueType(0), Attr)) + return SDValue(N,0); // Lower SDIV as SDIV + return SDValue(); +} + /// \brief Given an ISD::SDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. @@ -3036,3 +3038,46 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT); return true; } + +//===----------------------------------------------------------------------===// +// Implementation of Emulated TLS Model +//===----------------------------------------------------------------------===// + +SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, + SelectionDAG &DAG) const { + // Access to address of TLS varialbe xyz is lowered to a function call: + // __emutls_get_address( address of global variable named "__emutls_v.xyz" ) + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext()); + SDLoc dl(GA); + + ArgListTy Args; + ArgListEntry Entry; + std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str(); + Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent()); + StringRef EmuTlsVarName(NameString); + GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName); + if (!EmuTlsVar) + EmuTlsVar = dyn_cast_or_null<GlobalVariable>( + VariableModule->getOrInsertGlobal(EmuTlsVarName, VoidPtrType)); + Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT); + Entry.Ty = VoidPtrType; + Args.push_back(Entry); + + SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()); + CLI.setCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args), 0); + std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); + + // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. + // At last for X86 targets, maybe good for other targets too? + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setAdjustsStack(true); // Is this only for X86 target? + MFI->setHasCalls(true); + + assert((GA->getOffset() == 0) && + "Emulated TLS must have zero offset in GlobalAddressSDNode"); + return CallResult.first; +} |