diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 1758 |
1 files changed, 893 insertions, 865 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 49c922f560fa..e8950b58d42d 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -24,7 +24,6 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -111,10 +110,20 @@ static cl::opt<bool> MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads")); +static cl::opt<bool> + EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), + cl::desc("DAG combiner enable merging multiple stores " + "into a wider store")); + static cl::opt<unsigned> TokenFactorInlineLimit( "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors")); +static cl::opt<unsigned> StoreMergeDependenceLimit( + "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), + cl::desc("Limit the number of times for the same StoreNode and RootNode " + "to bail out in store merging dependence check")); + namespace { class DAGCombiner { @@ -152,6 +161,14 @@ namespace { /// which have not yet been combined to the worklist. SmallPtrSet<SDNode *, 32> CombinedNodes; + /// Map from candidate StoreNode to the pair of RootNode and count. + /// The count is used to track how many times we have seen the StoreNode + /// with the same RootNode bail out in dependence check. If we have seen + /// the bail out for the same pair many times over a limit, we won't + /// consider the StoreNode with the same RootNode as store merging + /// candidate again. + DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap; + // AA - Used for DAG load/store alias analysis. AliasAnalysis *AA; @@ -236,6 +253,7 @@ namespace { void removeFromWorklist(SDNode *N) { CombinedNodes.erase(N); PruningList.remove(N); + StoreRootCountMap.erase(N); auto It = WorklistMap.find(N); if (It == WorklistMap.end()) @@ -361,6 +379,7 @@ namespace { SDValue visitSUBE(SDNode *N); SDValue visitSUBCARRY(SDNode *N); SDValue visitMUL(SDNode *N); + SDValue visitMULFIX(SDNode *N); SDValue useDivRem(SDNode *N); SDValue visitSDIV(SDNode *N); SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N); @@ -421,7 +440,6 @@ namespace { SDValue visitFP_TO_SINT(SDNode *N); SDValue visitFP_TO_UINT(SDNode *N); SDValue visitFP_ROUND(SDNode *N); - SDValue visitFP_ROUND_INREG(SDNode *N); SDValue visitFP_EXTEND(SDNode *N); SDValue visitFNEG(SDNode *N); SDValue visitFABS(SDNode *N); @@ -470,7 +488,7 @@ namespace { SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1, SDNodeFlags Flags); - SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); + SDValue visitShiftByConstant(SDNode *N); SDValue foldSelectOfConstants(SDNode *N); SDValue foldVSelectOfConstants(SDNode *N); @@ -497,6 +515,7 @@ namespace { bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, SDValue &CC) const; bool isOneUseSetCC(SDValue N) const; + bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y); SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); @@ -510,7 +529,7 @@ namespace { SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); SDValue BuildLogBase2(SDValue V, const SDLoc &DL); - SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags); + SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags); SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); @@ -521,11 +540,11 @@ namespace { SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); - SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, + SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL); - SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); + SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); SDValue MatchLoadCombine(SDNode *N); SDValue MatchStoreCombine(StoreSDNode *N); SDValue ReduceLoadWidth(SDNode *N); @@ -742,6 +761,11 @@ CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); } +bool TargetLowering::DAGCombinerInfo:: +recursivelyDeleteUnusedNodes(SDNode *N) { + return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N); +} + void TargetLowering::DAGCombinerInfo:: CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); @@ -766,195 +790,6 @@ void DAGCombiner::deleteAndRecombine(SDNode *N) { DAG.DeleteNode(N); } -/// Return 1 if we can compute the negated form of the specified expression for -/// the same cost as the expression itself, or 2 if we can compute the negated -/// form more cheaply than the expression itself. -static char isNegatibleForFree(SDValue Op, bool LegalOperations, - const TargetLowering &TLI, - const TargetOptions *Options, - bool ForCodeSize, - unsigned Depth = 0) { - // fneg is removable even if it has multiple uses. - if (Op.getOpcode() == ISD::FNEG) - return 2; - - // Don't allow anything with multiple uses unless we know it is free. - EVT VT = Op.getValueType(); - const SDNodeFlags Flags = Op->getFlags(); - if (!Op.hasOneUse() && - !(Op.getOpcode() == ISD::FP_EXTEND && - TLI.isFPExtFree(VT, Op.getOperand(0).getValueType()))) - return 0; - - // Don't recurse exponentially. - if (Depth > 6) - return 0; - - switch (Op.getOpcode()) { - default: return false; - case ISD::ConstantFP: { - if (!LegalOperations) - return 1; - - // Don't invert constant FP values after legalization unless the target says - // the negated constant is legal. - return TLI.isOperationLegal(ISD::ConstantFP, VT) || - TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT, - ForCodeSize); - } - case ISD::BUILD_VECTOR: { - // Only permit BUILD_VECTOR of constants. - if (llvm::any_of(Op->op_values(), [&](SDValue N) { - return !N.isUndef() && !isa<ConstantFPSDNode>(N); - })) - return 0; - if (!LegalOperations) - return 1; - if (TLI.isOperationLegal(ISD::ConstantFP, VT) && - TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) - return 1; - return llvm::all_of(Op->op_values(), [&](SDValue N) { - return N.isUndef() || - TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT, - ForCodeSize); - }); - } - case ISD::FADD: - if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros()) - return 0; - - // After operation legalization, it might not be legal to create new FSUBs. - if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) - return 0; - - // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) - if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, - Options, ForCodeSize, Depth + 1)) - return V; - // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, - ForCodeSize, Depth + 1); - case ISD::FSUB: - // We can't turn -(A-B) into B-A when we honor signed zeros. - if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) - return 0; - - // fold (fneg (fsub A, B)) -> (fsub B, A) - return 1; - - case ISD::FMUL: - case ISD::FDIV: - // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) - if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, - Options, ForCodeSize, Depth + 1)) - return V; - - return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, - ForCodeSize, Depth + 1); - - case ISD::FP_EXTEND: - case ISD::FP_ROUND: - case ISD::FSIN: - return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, - ForCodeSize, Depth + 1); - } -} - -/// If isNegatibleForFree returns true, return the newly negated expression. -static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, - bool LegalOperations, bool ForCodeSize, - unsigned Depth = 0) { - // fneg is removable even if it has multiple uses. - if (Op.getOpcode() == ISD::FNEG) - return Op.getOperand(0); - - assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); - const TargetOptions &Options = DAG.getTarget().Options; - const SDNodeFlags Flags = Op->getFlags(); - - switch (Op.getOpcode()) { - default: llvm_unreachable("Unknown code"); - case ISD::ConstantFP: { - APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); - V.changeSign(); - return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); - } - case ISD::BUILD_VECTOR: { - SmallVector<SDValue, 4> Ops; - for (SDValue C : Op->op_values()) { - if (C.isUndef()) { - Ops.push_back(C); - continue; - } - APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF(); - V.changeSign(); - Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType())); - } - return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops); - } - case ISD::FADD: - assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros()); - - // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) - if (isNegatibleForFree(Op.getOperand(0), LegalOperations, - DAG.getTargetLoweringInfo(), &Options, ForCodeSize, - Depth + 1)) - return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(1), Flags); - // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(1), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(0), Flags); - case ISD::FSUB: - // fold (fneg (fsub 0, B)) -> B - if (ConstantFPSDNode *N0CFP = - isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true)) - if (N0CFP->isZero()) - return Op.getOperand(1); - - // fold (fneg (fsub A, B)) -> (fsub B, A) - return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - Op.getOperand(1), Op.getOperand(0), Flags); - - case ISD::FMUL: - case ISD::FDIV: - // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) - if (isNegatibleForFree(Op.getOperand(0), LegalOperations, - DAG.getTargetLoweringInfo(), &Options, ForCodeSize, - Depth + 1)) - return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(1), Flags); - - // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) - return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), - Op.getOperand(0), - GetNegatedExpression(Op.getOperand(1), DAG, - LegalOperations, ForCodeSize, - Depth + 1), Flags); - - case ISD::FP_EXTEND: - case ISD::FSIN: - return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1)); - case ISD::FP_ROUND: - return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(1)); - } -} - // APInts must be the same size for most operations, this helper // function zero extends the shorter of the pair so that they match. // We provide an Offset so that we can create bitwidths that won't overflow. @@ -1124,7 +959,6 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); if (!OpNode.getNode()) return SDValue(); - AddToWorklist(OpNode.getNode()); return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); } } @@ -1438,7 +1272,6 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { SDValue RV = DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1)); - AddToWorklist(N0.getNode()); if (Replace) ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); @@ -1591,8 +1424,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) { bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); for (SDNode *LN : UpdatedNodes) { - AddToWorklist(LN); AddUsersToWorklist(LN); + AddToWorklist(LN); } if (!NIsValid) continue; @@ -1673,6 +1506,10 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ADDCARRY: return visitADDCARRY(N); case ISD::SUBE: return visitSUBE(N); case ISD::SUBCARRY: return visitSUBCARRY(N); + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: + case ISD::UMULFIXSAT: return visitMULFIX(N); case ISD::MUL: return visitMUL(N); case ISD::SDIV: return visitSDIV(N); case ISD::UDIV: return visitUDIV(N); @@ -1736,7 +1573,6 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); case ISD::FP_ROUND: return visitFP_ROUND(N); - case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); case ISD::FP_EXTEND: return visitFP_EXTEND(N); case ISD::FNEG: return visitFNEG(N); case ISD::FABS: return visitFABS(N); @@ -3308,6 +3144,18 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } } + if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) { + // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry) + if (SDValue Carry = getAsCarry(TLI, N0)) { + SDValue X = N1; + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X); + return DAG.getNode(ISD::ADDCARRY, DL, + DAG.getVTList(VT, Carry.getValueType()), NegX, Zero, + Carry); + } + } + return SDValue(); } @@ -3442,6 +3290,30 @@ SDValue DAGCombiner::visitSUBCARRY(SDNode *N) { return SDValue(); } +// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and +// UMULFIXSAT here. +SDValue DAGCombiner::visitMULFIX(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue Scale = N->getOperand(2); + EVT VT = N0.getValueType(); + + // fold (mulfix x, undef, scale) -> 0 + if (N0.isUndef() || N1.isUndef()) + return DAG.getConstant(0, SDLoc(N), VT); + + // Canonicalize constant to RHS (vector doesn't have to splat) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale); + + // fold (mulfix x, 0, scale) -> 0 + if (isNullConstant(N1)) + return DAG.getConstant(0, SDLoc(N), VT); + + return SDValue(); +} + SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -3537,7 +3409,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // x * 15 --> (x << 4) - x // x * -33 --> -((x << 5) + x) // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4) - if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) { + if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) { // TODO: We could handle more general decomposition of any constant by // having the target set a limit on number of ops and making a // callback to determine that sequence (similar to sqrt expansion). @@ -4083,10 +3955,10 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { if (VT.isVector()) { // fold (mulhs x, 0) -> 0 - if (ISD::isBuildVectorAllZeros(N1.getNode())) - return N1; - if (ISD::isBuildVectorAllZeros(N0.getNode())) - return N0; + // do not return N0/N1, because undef node may exist. + if (ISD::isBuildVectorAllZeros(N0.getNode()) || + ISD::isBuildVectorAllZeros(N1.getNode())) + return DAG.getConstant(0, DL, VT); } // fold (mulhs x, 0) -> 0 @@ -4095,7 +3967,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { // fold (mulhs x, 1) -> (sra x, size(x)-1) if (isOneConstant(N1)) return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, - DAG.getConstant(N0.getValueSizeInBits() - 1, DL, + DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); // fold (mulhs x, undef) -> 0 @@ -4130,10 +4002,10 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { if (VT.isVector()) { // fold (mulhu x, 0) -> 0 - if (ISD::isBuildVectorAllZeros(N1.getNode())) - return N1; - if (ISD::isBuildVectorAllZeros(N0.getNode())) - return N0; + // do not return N0/N1, because undef node may exist. + if (ISD::isBuildVectorAllZeros(N0.getNode()) || + ISD::isBuildVectorAllZeros(N1.getNode())) + return DAG.getConstant(0, DL, VT); } // fold (mulhu x, 0) -> 0 @@ -4265,6 +4137,18 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); + // (umul_lohi N0, 0) -> (0, 0) + if (isNullConstant(N->getOperand(1))) { + SDValue Zero = DAG.getConstant(0, DL, VT); + return CombineTo(N, Zero, Zero); + } + + // (umul_lohi N0, 1) -> (N0, 0) + if (isOneConstant(N->getOperand(1))) { + SDValue Zero = DAG.getConstant(0, DL, VT); + return CombineTo(N, N->getOperand(0), Zero); + } + // If the type is twice as wide is legal, transform the mulhu to a wider // multiply plus a shift. if (VT.isSimple() && !VT.isVector()) { @@ -4290,13 +4174,29 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { } SDValue DAGCombiner::visitMULO(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); bool IsSigned = (ISD::SMULO == N->getOpcode()); + EVT CarryVT = N->getValueType(1); + SDLoc DL(N); + + // canonicalize constant to RHS. + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0); + + // fold (mulo x, 0) -> 0 + no carry out + if (isNullOrNullSplat(N1)) + return CombineTo(N, DAG.getConstant(0, DL, VT), + DAG.getConstant(0, DL, CarryVT)); + // (mulo x, 2) -> (addo x, x) - if (ConstantSDNode *C2 = isConstOrConstSplat(N->getOperand(1))) + if (ConstantSDNode *C2 = isConstOrConstSplat(N1)) if (C2->getAPIntValue() == 2) - return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N), - N->getVTList(), N->getOperand(0), N->getOperand(0)); + return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL, + N->getVTList(), N0, N0); return SDValue(); } @@ -4444,7 +4344,9 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) { if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) && Level <= AfterLegalizeTypes) { // Input types must be integer and the same. - if (XVT.isInteger() && XVT == Y.getValueType()) { + if (XVT.isInteger() && XVT == Y.getValueType() && + !(VT.isVector() && TLI.isTypeLegal(VT) && + !XVT.isVector() && !TLI.isTypeLegal(XVT))) { SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); return DAG.getNode(HandOpcode, DL, VT, Logic); } @@ -4770,8 +4672,8 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, return true; } - // Do not change the width of a volatile load. - if (LoadN->isVolatile()) + // Do not change the width of a volatile or atomic loads. + if (!LoadN->isSimple()) return false; // Do not generate loads of non-round integer types since these can @@ -4803,15 +4705,15 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST, if (!MemVT.isRound()) return false; - // Don't change the width of a volatile load. - if (LDST->isVolatile()) + // Don't change the width of a volatile or atomic loads. + if (!LDST->isSimple()) return false; // Verify that we are actually reducing a load width here. if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits()) return false; - // Ensure that this isn't going to produce an unsupported unaligned access. + // Ensure that this isn't going to produce an unsupported memory access. if (ShAmt && !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, LDST->getAddressSpace(), ShAmt / 8, @@ -5076,6 +4978,59 @@ SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) { return T1; } +/// Try to replace shift/logic that tests if a bit is clear with mask + setcc. +/// For a target with a bit test, this is expected to become test + set and save +/// at least 1 instruction. +static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) { + assert(And->getOpcode() == ISD::AND && "Expected an 'and' op"); + + // This is probably not worthwhile without a supported type. + EVT VT = And->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(VT)) + return SDValue(); + + // Look through an optional extension and find a 'not'. + // TODO: Should we favor test+set even without the 'not' op? + SDValue Not = And->getOperand(0), And1 = And->getOperand(1); + if (Not.getOpcode() == ISD::ANY_EXTEND) + Not = Not.getOperand(0); + if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1)) + return SDValue(); + + // Look though an optional truncation. The source operand may not be the same + // type as the original 'and', but that is ok because we are masking off + // everything but the low bit. + SDValue Srl = Not.getOperand(0); + if (Srl.getOpcode() == ISD::TRUNCATE) + Srl = Srl.getOperand(0); + + // Match a shift-right by constant. + if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() || + !isa<ConstantSDNode>(Srl.getOperand(1))) + return SDValue(); + + // We might have looked through casts that make this transform invalid. + // TODO: If the source type is wider than the result type, do the mask and + // compare in the source type. + const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1); + unsigned VTBitWidth = VT.getSizeInBits(); + if (ShiftAmt.uge(VTBitWidth)) + return SDValue(); + + // Turn this into a bit-test pattern using mask op + setcc: + // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0 + SDLoc DL(And); + SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT); + EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue Mask = DAG.getConstant( + APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT); + SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask); + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ); + return DAG.getZExtOrTrunc(Setcc, DL, VT); +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -5163,6 +5118,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } } + // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must // already be zero by virtue of the width of the base type of the load. @@ -5337,7 +5293,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { unsigned MemBitSize = MemVT.getScalarSizeInBits(); APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize); if (DAG.MaskedValueIsZero(N1, ExtBits) && - ((!LegalOperations && !LN0->isVolatile()) || + ((!LegalOperations && LN0->isSimple()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), @@ -5358,6 +5314,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N)) return Shifts; + if (TLI.hasBitTest(N0, N1)) + if (SDValue V = combineShiftAnd1ToBitTest(N, DAG)) + return V; + return SDValue(); } @@ -5564,6 +5524,23 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { return true; } +// Match 2 elements of a packed halfword bswap. +static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) { + if (N.getOpcode() == ISD::OR) + return isBSwapHWordElement(N.getOperand(0), Parts) && + isBSwapHWordElement(N.getOperand(1), Parts); + + if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) { + ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1)); + if (!C || C->getAPIntValue() != 16) + return false; + Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode(); + return true; + } + + return false; +} + /// Match a 32-bit packed halfword bswap. That is /// ((x & 0x000000ff) << 8) | /// ((x & 0x0000ff00) >> 8) | @@ -5581,43 +5558,26 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { return SDValue(); // Look for either - // (or (or (and), (and)), (or (and), (and))) - // (or (or (or (and), (and)), (and)), (and)) - if (N0.getOpcode() != ISD::OR) - return SDValue(); - SDValue N00 = N0.getOperand(0); - SDValue N01 = N0.getOperand(1); + // (or (bswaphpair), (bswaphpair)) + // (or (or (bswaphpair), (and)), (and)) + // (or (or (and), (bswaphpair)), (and)) SDNode *Parts[4] = {}; - if (N1.getOpcode() == ISD::OR && - N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { + if (isBSwapHWordPair(N0, Parts)) { // (or (or (and), (and)), (or (and), (and))) - if (!isBSwapHWordElement(N00, Parts)) + if (!isBSwapHWordPair(N1, Parts)) return SDValue(); - - if (!isBSwapHWordElement(N01, Parts)) - return SDValue(); - SDValue N10 = N1.getOperand(0); - if (!isBSwapHWordElement(N10, Parts)) - return SDValue(); - SDValue N11 = N1.getOperand(1); - if (!isBSwapHWordElement(N11, Parts)) - return SDValue(); - } else { + } else if (N0.getOpcode() == ISD::OR) { // (or (or (or (and), (and)), (and)), (and)) if (!isBSwapHWordElement(N1, Parts)) return SDValue(); - if (!isBSwapHWordElement(N01, Parts)) - return SDValue(); - if (N00.getOpcode() != ISD::OR) - return SDValue(); - SDValue N000 = N00.getOperand(0); - if (!isBSwapHWordElement(N000, Parts)) - return SDValue(); - SDValue N001 = N00.getOperand(1); - if (!isBSwapHWordElement(N001, Parts)) + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) && + !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts))) return SDValue(); - } + } else + return SDValue(); // Make sure the parts are all coming from the same node. if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) @@ -5791,15 +5751,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) { SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0); SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0); - bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT); - if (!LegalMask) { - std::swap(NewLHS, NewRHS); - ShuffleVectorSDNode::commuteMask(Mask); - LegalMask = TLI.isShuffleMaskLegal(Mask, VT); - } - - if (LegalMask) - return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask); + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, + Mask, DAG); + if (LegalShuffle) + return LegalShuffle; } } } @@ -5867,8 +5823,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return V; // See if this is some rotate idiom. - if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) - return SDValue(Rot, 0); + if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N))) + return Rot; if (SDValue Load = MatchLoadCombine(N)) return Load; @@ -5914,6 +5870,9 @@ static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift, /// Otherwise, returns an expansion of \p ExtractFrom based on the following /// patterns: /// +/// (or (add v v) (shrl v bitwidth-1)): +/// expands (add v v) -> (shl v 1) +/// /// (or (mul v c0) (shrl (mul v c1) c2)): /// expands (mul v c0) -> (shl (mul v c1) c3) /// @@ -5936,6 +5895,23 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, "Existing shift must be valid as a rotate half"); ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask); + + // Value and Type of the shift. + SDValue OppShiftLHS = OppShift.getOperand(0); + EVT ShiftedVT = OppShiftLHS.getValueType(); + + // Amount of the existing shift. + ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1)); + + // (add v v) -> (shl v 1) + if (OppShift.getOpcode() == ISD::SRL && OppShiftCst && + ExtractFrom.getOpcode() == ISD::ADD && + ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) && + ExtractFrom.getOperand(0) == OppShiftLHS && + OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1) + return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS, + DAG.getShiftAmountConstant(1, ShiftedVT, DL)); + // Preconditions: // (or (op0 v c0) (shiftl/r (op0 v c1) c2)) // @@ -5959,15 +5935,11 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, // op0 must be the same opcode on both sides, have the same LHS argument, // and produce the same value type. - SDValue OppShiftLHS = OppShift.getOperand(0); - EVT ShiftedVT = OppShiftLHS.getValueType(); if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() || OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) || ShiftedVT != ExtractFrom.getValueType()) return SDValue(); - // Amount of the existing shift. - ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1)); // Constant mul/udiv/shift amount from the RHS of the shift's LHS op. ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1)); // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op. @@ -6137,7 +6109,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the // former being preferred if supported. InnerPos and InnerNeg are Pos and // Neg with outer conversions stripped away. -SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, +SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL) { @@ -6152,32 +6124,33 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) { bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, - HasPos ? Pos : Neg).getNode(); + HasPos ? Pos : Neg); } - return nullptr; + return SDValue(); } // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. -SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { +SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // Must be a legal type. Expanded 'n promoted things won't work with rotates. EVT VT = LHS.getValueType(); - if (!TLI.isTypeLegal(VT)) return nullptr; + if (!TLI.isTypeLegal(VT)) + return SDValue(); // The target must have at least one rotate flavor. bool HasROTL = hasOperation(ISD::ROTL, VT); bool HasROTR = hasOperation(ISD::ROTR, VT); - if (!HasROTL && !HasROTR) return nullptr; + if (!HasROTL && !HasROTR) + return SDValue(); // Check for truncated rotate. if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE && LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) { assert(LHS.getValueType() == RHS.getValueType()); - if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) { - return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), - SDValue(Rot, 0)).getNode(); + if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) { + return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot); } } @@ -6192,7 +6165,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // If neither side matched a rotate half, bail if (!LHSShift && !RHSShift) - return nullptr; + return SDValue(); // InstCombine may have combined a constant shl, srl, mul, or udiv with one // side of the rotate, so try to handle that here. In all cases we need to @@ -6215,15 +6188,15 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // If a side is still missing, nothing else we can do. if (!RHSShift || !LHSShift) - return nullptr; + return SDValue(); // At this point we've matched or extracted a shift op on each side. if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) - return nullptr; // Not shifting the same value. + return SDValue(); // Not shifting the same value. if (LHSShift.getOpcode() == RHSShift.getOpcode()) - return nullptr; // Shifts must disagree. + return SDValue(); // Shifts must disagree. // Canonicalize shl to left side in a shl/srl pair. if (RHSShift.getOpcode() == ISD::SHL) { @@ -6267,13 +6240,13 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask); } - return Rot.getNode(); + return Rot; } // If there is a mask here, and we have a variable shift, we can't be sure // that we're masking out the right stuff. if (LHSMask.getNode() || RHSMask.getNode()) - return nullptr; + return SDValue(); // If the shift amount is sign/zext/any-extended just peel it off. SDValue LExtOp0 = LHSShiftAmt; @@ -6290,17 +6263,17 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { RExtOp0 = RHSShiftAmt.getOperand(0); } - SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, + SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); if (TryL) return TryL; - SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, + SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); if (TryR) return TryR; - return nullptr; + return SDValue(); } namespace { @@ -6415,7 +6388,7 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, Depth + 1); case ISD::LOAD: { auto L = cast<LoadSDNode>(Op.getNode()); - if (L->isVolatile() || L->isIndexed()) + if (!L->isSimple() || L->isIndexed()) return None; unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits(); @@ -6504,8 +6477,9 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) { SDValue Chain; SmallVector<StoreSDNode *, 8> Stores; for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) { + // TODO: Allow unordered atomics when wider type is legal (see D66309) if (Store->getMemoryVT() != MVT::i8 || - Store->isVolatile() || Store->isIndexed()) + !Store->isSimple() || Store->isIndexed()) return SDValue(); Stores.push_back(Store); Chain = Store->getChain(); @@ -6716,7 +6690,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { return SDValue(); LoadSDNode *L = P->Load; - assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() && + assert(L->hasNUsesOfValue(1, 0) && L->isSimple() && + !L->isIndexed() && "Must be enforced by calculateByteProvider"); assert(L->getOffset().isUndef() && "Unindexed load must have undef offset"); @@ -6958,25 +6933,25 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() && (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) { - SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); - if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { + SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1); + if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) { unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND; - LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS - RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS - AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); - return DAG.getNode(NewOpcode, DL, VT, LHS, RHS); + N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00 + N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01 + AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode()); + return DAG.getNode(NewOpcode, DL, VT, N00, N01); } } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants if (isAllOnesConstant(N1) && N0.hasOneUse() && (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) { - SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); - if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { + SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1); + if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) { unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND; - LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS - RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS - AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); - return DAG.getNode(NewOpcode, DL, VT, LHS, RHS); + N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00 + N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01 + AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode()); + return DAG.getNode(NewOpcode, DL, VT, N00, N01); } } @@ -7079,26 +7054,103 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return SDValue(); } +/// If we have a shift-by-constant of a bitwise logic op that itself has a +/// shift-by-constant operand with identical opcode, we may be able to convert +/// that into 2 independent shifts followed by the logic op. This is a +/// throughput improvement. +static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) { + // Match a one-use bitwise logic op. + SDValue LogicOp = Shift->getOperand(0); + if (!LogicOp.hasOneUse()) + return SDValue(); + + unsigned LogicOpcode = LogicOp.getOpcode(); + if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR && + LogicOpcode != ISD::XOR) + return SDValue(); + + // Find a matching one-use shift by constant. + unsigned ShiftOpcode = Shift->getOpcode(); + SDValue C1 = Shift->getOperand(1); + ConstantSDNode *C1Node = isConstOrConstSplat(C1); + assert(C1Node && "Expected a shift with constant operand"); + const APInt &C1Val = C1Node->getAPIntValue(); + auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp, + const APInt *&ShiftAmtVal) { + if (V.getOpcode() != ShiftOpcode || !V.hasOneUse()) + return false; + + ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1)); + if (!ShiftCNode) + return false; + + // Capture the shifted operand and shift amount value. + ShiftOp = V.getOperand(0); + ShiftAmtVal = &ShiftCNode->getAPIntValue(); + + // Shift amount types do not have to match their operand type, so check that + // the constants are the same width. + if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth()) + return false; + + // The fold is not valid if the sum of the shift values exceeds bitwidth. + if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits())) + return false; + + return true; + }; + + // Logic ops are commutative, so check each operand for a match. + SDValue X, Y; + const APInt *C0Val; + if (matchFirstShift(LogicOp.getOperand(0), X, C0Val)) + Y = LogicOp.getOperand(1); + else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val)) + Y = LogicOp.getOperand(0); + else + return SDValue(); + + // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1) + SDLoc DL(Shift); + EVT VT = Shift->getValueType(0); + EVT ShiftAmtVT = Shift->getOperand(1).getValueType(); + SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT); + SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC); + SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1); + return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2); +} + /// Handle transforms common to the three shifts, when the shift amount is a /// constant. /// We are looking for: (shift being one of shl/sra/srl) /// shift (binop X, C0), C1 /// And want to transform into: /// binop (shift X, C1), (shift C0, C1) -SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { +SDValue DAGCombiner::visitShiftByConstant(SDNode *N) { + assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand"); + // Do not turn a 'not' into a regular xor. if (isBitwiseNot(N->getOperand(0))) return SDValue(); // The inner binop must be one-use, since we want to replace it. - SDNode *LHS = N->getOperand(0).getNode(); - if (!LHS->hasOneUse()) return SDValue(); + SDValue LHS = N->getOperand(0); + if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level)) + return SDValue(); + + // TODO: This is limited to early combining because it may reveal regressions + // otherwise. But since we just checked a target hook to see if this is + // desirable, that should have filtered out cases where this interferes + // with some other pattern matching. + if (!LegalTypes) + if (SDValue R = combineShiftOfShiftedLogic(N, DAG)) + return R; // We want to pull some binops through shifts, so that we have (and (shift)) // instead of (shift (and)), likewise for add, or, xor, etc. This sort of // thing happens with address calculations, so it's important to canonicalize // it. - switch (LHS->getOpcode()) { + switch (LHS.getOpcode()) { default: return SDValue(); case ISD::OR: @@ -7112,14 +7164,14 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { } // We require the RHS of the binop to be a constant and not opaque as well. - ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1)); + ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1)); if (!BinOpCst) return SDValue(); // FIXME: disable this unless the input to the binop is a shift by a constant // or is copy/select. Enable this in other cases when figure out it's exactly // profitable. - SDValue BinOpLHSVal = LHS->getOperand(0); + SDValue BinOpLHSVal = LHS.getOperand(0); bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL || BinOpLHSVal.getOpcode() == ISD::SRA || BinOpLHSVal.getOpcode() == ISD::SRL) && @@ -7133,24 +7185,16 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { if (IsCopyOrSelect && N->hasOneUse()) return SDValue(); - EVT VT = N->getValueType(0); - - if (!TLI.isDesirableToCommuteWithShift(N, Level)) - return SDValue(); - // Fold the constants, shifting the binop RHS by the shift amount. - SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), - N->getValueType(0), - LHS->getOperand(1), N->getOperand(1)); + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1), + N->getOperand(1)); assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); - // Create the new shift. - SDValue NewShift = DAG.getNode(N->getOpcode(), - SDLoc(LHS->getOperand(0)), - VT, LHS->getOperand(0), N->getOperand(1)); - - // Create the new binop. - return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); + SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0), + N->getOperand(1)); + return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS); } SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { @@ -7478,7 +7522,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } if (N1C && !N1C->isOpaque()) - if (SDValue NewSHL = visitShiftByConstant(N, N1C)) + if (SDValue NewSHL = visitShiftByConstant(N)) return NewSHL; return SDValue(); @@ -7597,6 +7641,37 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { } } + // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper. + // sra (add (shl X, N1C), AddC), N1C --> + // sext (add (trunc X to (width - N1C)), AddC') + if (!LegalTypes && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C && + N0.getOperand(0).getOpcode() == ISD::SHL && + N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) { + if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) { + SDValue Shl = N0.getOperand(0); + // Determine what the truncate's type would be and ask the target if that + // is a free operation. + LLVMContext &Ctx = *DAG.getContext(); + unsigned ShiftAmt = N1C->getZExtValue(); + EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt); + if (VT.isVector()) + TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); + + // TODO: The simple type check probably belongs in the default hook + // implementation and/or target-specific overrides (because + // non-simple types likely require masking when legalized), but that + // restriction may conflict with other transforms. + if (TruncVT.isSimple() && TLI.isTruncateFree(VT, TruncVT)) { + SDLoc DL(N); + SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT); + SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt). + trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT); + SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC); + return DAG.getSExtOrTrunc(Add, DL, VT); + } + } + } + // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { @@ -7638,7 +7713,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); if (N1C && !N1C->isOpaque()) - if (SDValue NewSRA = visitShiftByConstant(N, N1C)) + if (SDValue NewSRA = visitShiftByConstant(N)) return NewSRA; return SDValue(); @@ -7819,7 +7894,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return SDValue(N, 0); if (N1C && !N1C->isOpaque()) - if (SDValue NewSRL = visitShiftByConstant(N, N1C)) + if (SDValue NewSRL = visitShiftByConstant(N)) return NewSRL; // Attempt to convert a srl of a load into a narrower zero-extending load. @@ -8100,6 +8175,43 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, } } +/// If a (v)select has a condition value that is a sign-bit test, try to smear +/// the condition operand sign-bit across the value width and use it as a mask. +static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) { + SDValue Cond = N->getOperand(0); + SDValue C1 = N->getOperand(1); + SDValue C2 = N->getOperand(2); + assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) && + "Expected select-of-constants"); + + EVT VT = N->getValueType(0); + if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() || + VT != Cond.getOperand(0).getValueType()) + return SDValue(); + + // The inverted-condition + commuted-select variants of these patterns are + // canonicalized to these forms in IR. + SDValue X = Cond.getOperand(0); + SDValue CondC = Cond.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); + if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) && + isAllOnesOrAllOnesSplat(C2)) { + // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1 + SDLoc DL(N); + SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC); + return DAG.getNode(ISD::OR, DL, VT, Sra, C1); + } + if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) { + // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1 + SDLoc DL(N); + SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC); + return DAG.getNode(ISD::AND, DL, VT, Sra, C1); + } + return SDValue(); +} + SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { SDValue Cond = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -8148,22 +8260,36 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { return Cond; } - // For any constants that differ by 1, we can transform the select into an - // extend and add. Use a target hook because some targets may prefer to - // transform in the other direction. + // Use a target hook because some targets may prefer to transform in the + // other direction. if (TLI.convertSelectOfConstantsToMath(VT)) { - if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) { + // For any constants that differ by 1, we can transform the select into an + // extend and add. + const APInt &C1Val = C1->getAPIntValue(); + const APInt &C2Val = C2->getAPIntValue(); + if (C1Val - 1 == C2Val) { // select Cond, C1, C1-1 --> add (zext Cond), C1-1 if (VT != MVT::i1) Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); } - if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) { + if (C1Val + 1 == C2Val) { // select Cond, C1, C1+1 --> add (sext Cond), C1+1 if (VT != MVT::i1) Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); } + + // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) + if (C1Val.isPowerOf2() && C2Val.isNullValue()) { + if (VT != MVT::i1) + Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); + SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC); + } + + if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) + return V; } return SDValue(); @@ -8381,23 +8507,6 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return SDValue(); } -static -std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { - SDLoc DL(N); - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - - // Split the inputs. - SDValue Lo, Hi, LL, LH, RL, RH; - std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); - std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); - - Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); - Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); - - return std::make_pair(Lo, Hi); -} - // This function assumes all the vselect's arguments are CONCAT_VECTOR // nodes and that the condition is a BV of ConstantSDNodes (or undefs). static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { @@ -8456,7 +8565,6 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { SDValue DAGCombiner::visitMSCATTER(SDNode *N) { MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); SDValue Mask = MSC->getMask(); - SDValue Data = MSC->getValue(); SDValue Chain = MSC->getChain(); SDLoc DL(N); @@ -8464,123 +8572,19 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return Chain; - if (Level >= AfterLegalizeTypes) - return SDValue(); - - // If the MSCATTER data type requires splitting and the mask is provided by a - // SETCC, then split both nodes and its operands before legalization. This - // prevents the type legalizer from unrolling SETCC into scalar comparisons - // and enables future optimizations (e.g. min/max pattern matching on X86). - if (Mask.getOpcode() != ISD::SETCC) - return SDValue(); - - // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != - TargetLowering::TypeSplitVector) - return SDValue(); - SDValue MaskLo, MaskHi; - std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0)); - - EVT MemoryVT = MSC->getMemoryVT(); - unsigned Alignment = MSC->getOriginalAlignment(); - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - SDValue DataLo, DataHi; - std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); - - SDValue Scale = MSC->getScale(); - SDValue BasePtr = MSC->getBasePtr(); - SDValue IndexLo, IndexHi; - std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MSC->getPointerInfo(), - MachineMemOperand::MOStore, LoMemVT.getStoreSize(), - Alignment, MSC->getAAInfo(), MSC->getRanges()); - - SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale }; - SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), - DataLo.getValueType(), DL, OpsLo, MMO); - - // The order of the Scatter operation after split is well defined. The "Hi" - // part comes after the "Lo". So these two operations should be chained one - // after another. - SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale }; - return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), - DL, OpsHi, MMO); + return SDValue(); } SDValue DAGCombiner::visitMSTORE(SDNode *N) { MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); SDValue Mask = MST->getMask(); - SDValue Data = MST->getValue(); SDValue Chain = MST->getChain(); - EVT VT = Data.getValueType(); SDLoc DL(N); // Zap masked stores with a zero mask. if (ISD::isBuildVectorAllZeros(Mask.getNode())) return Chain; - if (Level >= AfterLegalizeTypes) - return SDValue(); - - // If the MSTORE data type requires splitting and the mask is provided by a - // SETCC, then split both nodes and its operands before legalization. This - // prevents the type legalizer from unrolling SETCC into scalar comparisons - // and enables future optimizations (e.g. min/max pattern matching on X86). - if (Mask.getOpcode() == ISD::SETCC) { - // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), VT) != - TargetLowering::TypeSplitVector) - return SDValue(); - - SDValue MaskLo, MaskHi, Lo, Hi; - std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - - SDValue Ptr = MST->getBasePtr(); - - EVT MemoryVT = MST->getMemoryVT(); - unsigned Alignment = MST->getOriginalAlignment(); - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - SDValue DataLo, DataHi; - std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MST->getPointerInfo(), - MachineMemOperand::MOStore, LoMemVT.getStoreSize(), - Alignment, MST->getAAInfo(), MST->getRanges()); - - Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, - MST->isTruncatingStore(), - MST->isCompressingStore()); - - Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, - MST->isCompressingStore()); - unsigned HiOffset = LoMemVT.getStoreSize(); - - MMO = DAG.getMachineFunction().getMachineMemOperand( - MST->getPointerInfo().getWithOffset(HiOffset), - MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment, - MST->getAAInfo(), MST->getRanges()); - - Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, - MST->isTruncatingStore(), - MST->isCompressingStore()); - - AddToWorklist(Lo.getNode()); - AddToWorklist(Hi.getNode()); - - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); - } return SDValue(); } @@ -8593,76 +8597,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return CombineTo(N, MGT->getPassThru(), MGT->getChain()); - if (Level >= AfterLegalizeTypes) - return SDValue(); - - // If the MGATHER result requires splitting and the mask is provided by a - // SETCC, then split both nodes and its operands before legalization. This - // prevents the type legalizer from unrolling SETCC into scalar comparisons - // and enables future optimizations (e.g. min/max pattern matching on X86). - - if (Mask.getOpcode() != ISD::SETCC) - return SDValue(); - - EVT VT = N->getValueType(0); - - // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), VT) != - TargetLowering::TypeSplitVector) - return SDValue(); - - SDValue MaskLo, MaskHi, Lo, Hi; - std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - - SDValue PassThru = MGT->getPassThru(); - SDValue PassThruLo, PassThruHi; - std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); - - SDValue Chain = MGT->getChain(); - EVT MemoryVT = MGT->getMemoryVT(); - unsigned Alignment = MGT->getOriginalAlignment(); - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - SDValue Scale = MGT->getScale(); - SDValue BasePtr = MGT->getBasePtr(); - SDValue Index = MGT->getIndex(); - SDValue IndexLo, IndexHi; - std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MGT->getPointerInfo(), - MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), - Alignment, MGT->getAAInfo(), MGT->getRanges()); - - SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale }; - Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo, - MMO); - - SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale }; - Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi, - MMO); - - AddToWorklist(Lo.getNode()); - AddToWorklist(Hi.getNode()); - - // Build a factor node to remember that this load is independent of the - // other one. - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - // Legalized the chain result - switch anything that used the old chain to - // use the new one. - DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain); - - SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); - - SDValue RetOps[] = { GatherRes, Chain }; - return DAG.getMergeValues(RetOps, DL); + return SDValue(); } SDValue DAGCombiner::visitMLOAD(SDNode *N) { @@ -8674,76 +8609,6 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return CombineTo(N, MLD->getPassThru(), MLD->getChain()); - if (Level >= AfterLegalizeTypes) - return SDValue(); - - // If the MLOAD result requires splitting and the mask is provided by a - // SETCC, then split both nodes and its operands before legalization. This - // prevents the type legalizer from unrolling SETCC into scalar comparisons - // and enables future optimizations (e.g. min/max pattern matching on X86). - if (Mask.getOpcode() == ISD::SETCC) { - EVT VT = N->getValueType(0); - - // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), VT) != - TargetLowering::TypeSplitVector) - return SDValue(); - - SDValue MaskLo, MaskHi, Lo, Hi; - std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - - SDValue PassThru = MLD->getPassThru(); - SDValue PassThruLo, PassThruHi; - std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); - - SDValue Chain = MLD->getChain(); - SDValue Ptr = MLD->getBasePtr(); - EVT MemoryVT = MLD->getMemoryVT(); - unsigned Alignment = MLD->getOriginalAlignment(); - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), - MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), - Alignment, MLD->getAAInfo(), MLD->getRanges()); - - Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT, - MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad()); - - Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, - MLD->isExpandingLoad()); - unsigned HiOffset = LoMemVT.getStoreSize(); - - MMO = DAG.getMachineFunction().getMachineMemOperand( - MLD->getPointerInfo().getWithOffset(HiOffset), - MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment, - MLD->getAAInfo(), MLD->getRanges()); - - Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT, - MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad()); - - AddToWorklist(Lo.getNode()); - AddToWorklist(Hi.getNode()); - - // Build a factor node to remember that this load is independent of the - // other one. - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - // Legalized the chain result - switch anything that used the old chain to - // use the new one. - DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain); - - SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); - - SDValue RetOps[] = { LoadRes, Chain }; - return DAG.getMergeValues(RetOps, DL); - } return SDValue(); } @@ -8791,6 +8656,18 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2); } + // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C) + APInt Pow2C; + if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() && + isNullOrNullSplat(N2)) { + SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT); + SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC); + } + + if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) + return V; + // The general case for select-of-constants: // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2 // ...but that only makes sense if a vselect is slower than 2 logic ops, so @@ -8832,13 +8709,12 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); if (isAbs) { - EVT VT = LHS.getValueType(); if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) return DAG.getNode(ISD::ABS, DL, VT, LHS); - SDValue Shift = DAG.getNode( - ISD::SRA, DL, VT, LHS, - DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT)); + SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS, + DAG.getConstant(VT.getScalarSizeInBits() - 1, + DL, getShiftAmountTy(VT))); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); AddToWorklist(Shift.getNode()); AddToWorklist(Add.getNode()); @@ -8851,10 +8727,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { // This is OK if we don't care about what happens if either operand is a // NaN. // - if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0), - N0.getOperand(1), TLI)) { - if (SDValue FMinMax = combineMinNumMaxNum( - DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG)) + if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) { + if (SDValue FMinMax = + combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG)) return FMinMax; } @@ -9209,8 +9084,9 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) || - !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() || - !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) + !N0.hasOneUse() || !LN0->isSimple() || + !DstVT.isVector() || !DstVT.isPow2VectorType() || + !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) return SDValue(); SmallVector<SDNode *, 4> SetCCs; @@ -9411,7 +9287,8 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, LoadSDNode *LN0 = cast<LoadSDNode>(N0); EVT MemVT = LN0->getMemoryVT(); - if ((LegalOperations || LN0->isVolatile() || VT.isVector()) && + if ((LegalOperations || !LN0->isSimple() || + VT.isVector()) && !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT)) return SDValue(); @@ -9436,7 +9313,7 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, if (!ISD::isNON_EXTLoad(N0.getNode()) || !ISD::isUNINDEXEDLoad(N0.getNode()) || ((LegalOperations || VT.isVector() || - cast<LoadSDNode>(N0)->isVolatile()) && + !cast<LoadSDNode>(N0)->isSimple()) && !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))) return {}; @@ -9468,6 +9345,35 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, return SDValue(N, 0); // Return N so it doesn't get rechecked! } +static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, + const TargetLowering &TLI, EVT VT, + SDNode *N, SDValue N0, + ISD::LoadExtType ExtLoadType, + ISD::NodeType ExtOpc) { + if (!N0.hasOneUse()) + return SDValue(); + + MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0); + if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD) + return SDValue(); + + if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0))) + return SDValue(); + + if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0))) + return SDValue(); + + SDLoc dl(Ld); + SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru()); + SDValue NewLoad = DAG.getMaskedLoad(VT, dl, Ld->getChain(), + Ld->getBasePtr(), Ld->getMask(), + PassThru, Ld->getMemoryVT(), + Ld->getMemOperand(), ExtLoadType, + Ld->isExpandingLoad()); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1)); + return NewLoad; +} + static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations) { assert((N->getOpcode() == ISD::SIGN_EXTEND || @@ -9568,6 +9474,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { ISD::SEXTLOAD, ISD::SIGN_EXTEND)) return foldedExt; + if (SDValue foldedExt = + tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD, + ISD::SIGN_EXTEND)) + return foldedExt; + // fold (sext (load x)) to multiple smaller sextloads. // Only on illegal but splittable vectors. if (SDValue ExtLoad = CombineExtLoad(N)) @@ -9856,6 +9767,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { ISD::ZEXTLOAD, ISD::ZERO_EXTEND)) return foldedExt; + if (SDValue foldedExt = + tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD, + ISD::ZERO_EXTEND)) + return foldedExt; + // fold (zext (load x)) to multiple smaller zextloads. // Only on illegal but splittable vectors. if (SDValue ExtLoad = CombineExtLoad(N)) @@ -10340,7 +10256,10 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { return SDValue(); LoadSDNode *LN0 = cast<LoadSDNode>(N0); - if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt)) + // Reducing the width of a volatile load is illegal. For atomics, we may be + // able to reduce the width provided we never widen again. (see D66309) + if (!LN0->isSimple() || + !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt)) return SDValue(); auto AdjustBigEndianShift = [&](unsigned ShAmt) { @@ -10369,11 +10288,11 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDValue Load; if (ExtType == ISD::NON_EXTLOAD) - Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, + Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); else - Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr, + Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, NewAlign, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); @@ -10392,7 +10311,6 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // no larger than the source) then the useful bits of the result are // zero; we can't simply return the shortened shift, because the result // of that operation is undefined. - SDLoc DL(N0); if (ShLeftAmt >= VT.getSizeInBits()) Result = DAG.getConstant(0, DL, VT); else @@ -10513,7 +10431,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && EVT == cast<LoadSDNode>(N0)->getMemoryVT() && - ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() && + ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() && N0.hasOneUse()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); @@ -10530,7 +10448,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse() && EVT == cast<LoadSDNode>(N0)->getMemoryVT() && - ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) && TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, @@ -10757,7 +10675,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // after truncation. if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - if (!LN0->isVolatile() && + if (LN0->isSimple() && LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), @@ -11051,7 +10969,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // memory accesses. We don't care if the original type was legal or not // as we assume software couldn't rely on the number of accesses of an // illegal type. - ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) || TLI.isOperationLegal(ISD::LOAD, VT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); @@ -11237,15 +11155,10 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { for (int i = 0; i != MaskScale; ++i) NewMask.push_back(M < 0 ? -1 : M * MaskScale + i); - bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); - if (!LegalMask) { - std::swap(SV0, SV1); - ShuffleVectorSDNode::commuteMask(NewMask); - LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); - } - - if (LegalMask) - return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask); + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG); + if (LegalShuffle) + return LegalShuffle; } return SDValue(); @@ -11998,7 +11911,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math) ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true); if (N1C && N1C->isZero()) - if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros()) + if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) return N0; if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -12006,17 +11919,17 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize) == 2) - return DAG.getNode(ISD::FSUB, DL, VT, N0, - GetNegatedExpression(N1, DAG, LegalOperations, - ForCodeSize), Flags); + TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize) == 2) + return DAG.getNode( + ISD::FSUB, DL, VT, N0, + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N0, LegalOperations, TLI, &Options, ForCodeSize) == 2) - return DAG.getNode(ISD::FSUB, DL, VT, N1, - GetNegatedExpression(N0, DAG, LegalOperations, - ForCodeSize), Flags); + TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize) == 2) + return DAG.getNode( + ISD::FSUB, DL, VT, N1, + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), Flags); auto isFMulNegTwo = [](SDValue FMul) { if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL) @@ -12056,7 +11969,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // If 'unsafe math' or reassoc and nsz, fold lots of things. // TODO: break out portions of the transformations below for which Unsafe is // considered and which do not require both nsz and reassoc - if ((Options.UnsafeFPMath || + if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && AllowNewConst) { // fadd (fadd x, c1), c2 -> fadd x, c1 + c2 @@ -12175,7 +12088,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // (fsub A, 0) -> A if (N1CFP && N1CFP->isZero()) { - if (!N1CFP->isNegative() || Options.UnsafeFPMath || + if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) { return N0; } @@ -12195,16 +12108,16 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (N0CFP && N0CFP->isZero()) { if (N0CFP->isNegative() || (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { - if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize)) - return GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize)) + return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags); } } - if ((Options.UnsafeFPMath || - (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) - && N1.getOpcode() == ISD::FADD) { + if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || + (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && + N1.getOpcode() == ISD::FADD) { // X - (X + Y) -> -Y if (N0 == N1->getOperand(0)) return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags); @@ -12214,10 +12127,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // fold (fsub A, (fneg B)) -> (fadd A, B) - if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize)) - return DAG.getNode(ISD::FADD, DL, VT, N0, - GetNegatedExpression(N1, DAG, LegalOperations, - ForCodeSize), Flags); + if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize)) + return DAG.getNode( + ISD::FADD, DL, VT, N0, + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); // FSUB -> FMA combines: if (SDValue Fused = visitFSUBForFMACombine(N)) { @@ -12228,6 +12141,21 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { return SDValue(); } +/// Return true if both inputs are at least as cheap in negated form and at +/// least one input is strictly cheaper in negated form. +bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) { + if (char LHSNeg = + TLI.isNegatibleForFree(X, DAG, LegalOperations, ForCodeSize)) + if (char RHSNeg = + TLI.isNegatibleForFree(Y, DAG, LegalOperations, ForCodeSize)) + // Both negated operands are at least as cheap as their counterparts. + // Check to see if at least one is cheaper negated. + if (LHSNeg == 2 || RHSNeg == 2) + return true; + + return false; +} + SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -12254,10 +12182,6 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags); - // fold (fmul A, 1.0) -> A - if (N1CFP && N1CFP->isExactlyValue(1.0)) - return N0; - if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -12302,21 +12226,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, DL, VT, N0); - // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options, - ForCodeSize)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options, - ForCodeSize)) { - // Both can be negated for free, check to see if at least one is cheaper - // negated. - if (LHSNeg == 2 || RHSNeg == 2) - return DAG.getNode(ISD::FMUL, DL, VT, - GetNegatedExpression(N0, DAG, LegalOperations, - ForCodeSize), - GetNegatedExpression(N1, DAG, LegalOperations, - ForCodeSize), - Flags); - } + // -N0 * -N1 --> N0 * N1 + if (isCheaperToUseNegatedFPOps(N0, N1)) { + SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags); } // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X)) @@ -12395,6 +12311,15 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2); } + // (-N0 * -N1) + N2 --> (N0 * N1) + N2 + if (isCheaperToUseNegatedFPOps(N0, N1)) { + SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags); + } + if (UnsafeFPMath) { if (N0CFP && N0CFP->isZero()) return N2; @@ -12602,9 +12527,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { - if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); - } } else if (N1.getOpcode() == ISD::FP_EXTEND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), @@ -12645,28 +12569,16 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } // Fold into a reciprocal estimate and multiply instead of a real divide. - if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) { - AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); - } + if (SDValue RV = BuildDivEstimate(N0, N1, Flags)) + return RV; } // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options, - ForCodeSize)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options, - ForCodeSize)) { - // Both can be negated for free, check to see if at least one is cheaper - // negated. - if (LHSNeg == 2 || RHSNeg == 2) - return DAG.getNode(ISD::FDIV, SDLoc(N), VT, - GetNegatedExpression(N0, DAG, LegalOperations, - ForCodeSize), - GetNegatedExpression(N1, DAG, LegalOperations, - ForCodeSize), - Flags); - } - } + if (isCheaperToUseNegatedFPOps(N0, N1)) + return DAG.getNode( + ISD::FDIV, SDLoc(N), VT, + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); return SDValue(); } @@ -13112,22 +13024,6 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { - SDValue N0 = N->getOperand(0); - EVT VT = N->getValueType(0); - EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - - // fold (fp_round_inreg c1fp) -> c1fp - if (N0CFP && isTypeLegal(EVT)) { - SDLoc DL(N); - SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT); - return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round); - } - - return SDValue(); -} - SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -13236,9 +13132,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); - if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), - &DAG.getTarget().Options, ForCodeSize)) - return GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize)) + return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading // constant pool values. @@ -14004,11 +13899,12 @@ bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) { } SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { - if (OptLevel == CodeGenOpt::None || LD->isVolatile()) + if (OptLevel == CodeGenOpt::None || !LD->isSimple()) return SDValue(); SDValue Chain = LD->getOperand(0); StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode()); - if (!ST || ST->isVolatile()) + // TODO: Relax this restriction for unordered atomics (see D66309) + if (!ST || !ST->isSimple()) return SDValue(); EVT LDType = LD->getValueType(0); @@ -14107,7 +14003,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // If load is not volatile and there are no uses of the loaded value (and // the updated indexed value in case of indexed loads), change uses of the // chain value into uses of the chain input (i.e. delete the dead load). - if (!LD->isVolatile()) { + // TODO: Allow this for unordered atomics (see D66309) + if (LD->isSimple()) { if (N->getValueType(1) == MVT::Other) { // Unindexed loads. if (!N->hasAnyUseOfValue(0)) { @@ -14241,7 +14138,7 @@ struct LoadedSlice { /// Helper structure used to compute the cost of a slice. struct Cost { /// Are we optimizing for code size. - bool ForCodeSize; + bool ForCodeSize = false; /// Various cost. unsigned Loads = 0; @@ -14250,10 +14147,10 @@ struct LoadedSlice { unsigned ZExts = 0; unsigned Shift = 0; - Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {} + explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {} /// Get the cost of one isolated slice. - Cost(const LoadedSlice &LS, bool ForCodeSize = false) + Cost(const LoadedSlice &LS, bool ForCodeSize) : ForCodeSize(ForCodeSize), Loads(1) { EVT TruncType = LS.Inst->getValueType(0); EVT LoadedType = LS.getLoadedType(); @@ -14678,7 +14575,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { return false; LoadSDNode *LD = cast<LoadSDNode>(N); - if (LD->isVolatile() || !ISD::isNormalLoad(LD) || + if (!LD->isSimple() || !ISD::isNormalLoad(LD) || !LD->getValueType(0).isInteger()) return false; @@ -14829,13 +14726,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { else if (Chain->getOpcode() == ISD::TokenFactor && SDValue(LD, 1).hasOneUse()) { // LD has only 1 chain use so they are no indirect dependencies. - bool isOk = false; - for (const SDValue &ChainOp : Chain->op_values()) - if (ChainOp.getNode() == LD) { - isOk = true; - break; - } - if (!isOk) + if (!LD->isOperandOf(Chain.getNode())) return Result; } else return Result; // Fail. @@ -14848,7 +14739,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { /// Check to see if IVal is something that provides a value as specified by /// MaskInfo. If so, replace the specified store with a narrower store of /// truncated IVal. -static SDNode * +static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC) { @@ -14860,14 +14751,19 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, // that uses this. If not, this is not a replacement. APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), ByteShift*8, (ByteShift+NumBytes)*8); - if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr; + if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue(); // Check that it is legal on the target to do this. It is legal if the new // VT we're shrinking to (i8/i16/i32) is legal or we're still before type - // legalization. - MVT VT = MVT::getIntegerVT(NumBytes*8); + // legalization (and the target doesn't explicitly think this is a bad idea). + MVT VT = MVT::getIntegerVT(NumBytes * 8); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!DC->isTypeLegal(VT)) - return nullptr; + return SDValue(); + if (St->getMemOperand() && + !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, + *St->getMemOperand())) + return SDValue(); // Okay, we can do this! Replace the 'St' store with a store of IVal that is // shifted by ByteShift and truncated down to NumBytes. @@ -14901,8 +14797,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, ++OpsNarrowed; return DAG .getStore(St->getChain(), SDLoc(St), IVal, Ptr, - St->getPointerInfo().getWithOffset(StOffset), NewAlign) - .getNode(); + St->getPointerInfo().getWithOffset(StOffset), NewAlign); } /// Look for sequence of load / op / store where op is one of 'or', 'xor', and @@ -14911,7 +14806,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, /// or code size. SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { StoreSDNode *ST = cast<StoreSDNode>(N); - if (ST->isVolatile()) + if (!ST->isSimple()) return SDValue(); SDValue Chain = ST->getChain(); @@ -14933,16 +14828,16 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { std::pair<unsigned, unsigned> MaskedLoad; MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); if (MaskedLoad.first) - if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, Value.getOperand(1), ST,this)) - return SDValue(NewST, 0); + return NewST; // Or is commutative, so try swapping X and Y. MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); if (MaskedLoad.first) - if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, Value.getOperand(0), ST,this)) - return SDValue(NewST, 0); + return NewST; } if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || @@ -15367,14 +15262,16 @@ void DAGCombiner::getStoreMergeCandidates( // Loads must only have one use. if (!Ld->hasNUsesOfValue(1, 0)) return; - // The memory operands must not be volatile/indexed. - if (Ld->isVolatile() || Ld->isIndexed()) + // The memory operands must not be volatile/indexed/atomic. + // TODO: May be able to relax for unordered atomics (see D66309) + if (!Ld->isSimple() || Ld->isIndexed()) return; } auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, int64_t &Offset) -> bool { - // The memory operands must not be volatile/indexed. - if (Other->isVolatile() || Other->isIndexed()) + // The memory operands must not be volatile/indexed/atomic. + // TODO: May be able to relax for unordered atomics (see D66309) + if (!Other->isSimple() || Other->isIndexed()) return false; // Don't mix temporal stores with non-temporal stores. if (St->isNonTemporal() != Other->isNonTemporal()) @@ -15394,8 +15291,10 @@ void DAGCombiner::getStoreMergeCandidates( // Loads must only have one use. if (!OtherLd->hasNUsesOfValue(1, 0)) return false; - // The memory operands must not be volatile/indexed. - if (OtherLd->isVolatile() || OtherLd->isIndexed()) + // The memory operands must not be volatile/indexed/atomic. + // TODO: May be able to relax for unordered atomics (see D66309) + if (!OtherLd->isSimple() || + OtherLd->isIndexed()) return false; // Don't mix temporal loads with non-temporal loads. if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal()) @@ -15425,6 +15324,18 @@ void DAGCombiner::getStoreMergeCandidates( return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); }; + // Check if the pair of StoreNode and the RootNode already bail out many + // times which is over the limit in dependence check. + auto OverLimitInDependenceCheck = [&](SDNode *StoreNode, + SDNode *RootNode) -> bool { + auto RootCount = StoreRootCountMap.find(StoreNode); + if (RootCount != StoreRootCountMap.end() && + RootCount->second.first == RootNode && + RootCount->second.second > StoreMergeDependenceLimit) + return true; + return false; + }; + // We looking for a root node which is an ancestor to all mergable // stores. We search up through a load, to our root and then down // through all children. For instance we will find Store{1,2,3} if @@ -15454,7 +15365,8 @@ void DAGCombiner::getStoreMergeCandidates( if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) { BaseIndexOffset Ptr; int64_t PtrDiff; - if (CandidateMatch(OtherST, Ptr, PtrDiff)) + if (CandidateMatch(OtherST, Ptr, PtrDiff) && + !OverLimitInDependenceCheck(OtherST, RootNode)) StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); } } else @@ -15464,7 +15376,8 @@ void DAGCombiner::getStoreMergeCandidates( if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) { BaseIndexOffset Ptr; int64_t PtrDiff; - if (CandidateMatch(OtherST, Ptr, PtrDiff)) + if (CandidateMatch(OtherST, Ptr, PtrDiff) && + !OverLimitInDependenceCheck(OtherST, RootNode)) StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); } } @@ -15522,13 +15435,24 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies( // Search through DAG. We can stop early if we find a store node. for (unsigned i = 0; i < NumStores; ++i) if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist, - Max)) + Max)) { + // If the searching bail out, record the StoreNode and RootNode in the + // StoreRootCountMap. If we have seen the pair many times over a limit, + // we won't add the StoreNode into StoreNodes set again. + if (Visited.size() >= Max) { + auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode]; + if (RootCount.first == RootNode) + RootCount.second++; + else + RootCount = {RootNode, 1}; + } return false; + } return true; } bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { - if (OptLevel == CodeGenOpt::None) + if (OptLevel == CodeGenOpt::None || !EnableStoreMerging) return false; EVT MemVT = St->getMemoryVT(); @@ -15588,7 +15512,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { bool RV = false; while (StoreNodes.size() > 1) { - unsigned StartIdx = 0; + size_t StartIdx = 0; while ((StartIdx + 1 < StoreNodes.size()) && StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes != StoreNodes[StartIdx + 1].OffsetFromBase) @@ -16113,7 +16037,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { case MVT::ppcf128: return SDValue(); case MVT::f32: - if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || + if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { ; Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). @@ -16125,7 +16049,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { return SDValue(); case MVT::f64: if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && - !ST->isVolatile()) || + ST->isSimple()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { ; Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). @@ -16134,7 +16058,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { Ptr, ST->getMemOperand()); } - if (!ST->isVolatile() && + if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { // Many FP stores are not made apparent until after legalize, e.g. for // argument passing. Since this is so common, custom legalize the @@ -16181,7 +16105,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // memory accesses. We don't care if the original type was legal or not // as we assume software couldn't rely on the number of accesses of an // illegal type. - if (((!LegalOperations && !ST->isVolatile()) || + // TODO: May be able to relax for unordered atomics (see D66309) + if (((!LegalOperations && ST->isSimple()) || TLI.isOperationLegal(ISD::STORE, SVT)) && TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT, DAG, *ST->getMemOperand())) { @@ -16242,9 +16167,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // See if we can simplify the input to this truncstore with knowledge that // only the low bits are being used. For example: // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" - SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits); AddToWorklist(Value.getNode()); - if (Shorter) + if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits)) return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(), ST->getMemOperand()); @@ -16263,9 +16187,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // If this is a load followed by a store to the same location, then the store // is dead/noop. + // TODO: Can relax for unordered atomics (see D66309) if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) { if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && - ST->isUnindexed() && !ST->isVolatile() && + ST->isUnindexed() && ST->isSimple() && // There can't be any side effects between the load and store, such as // a call or store. Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { @@ -16274,9 +16199,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } } + // TODO: Can relax for unordered atomics (see D66309) if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) { - if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() && - !ST1->isVolatile()) { + if (ST->isUnindexed() && ST->isSimple() && + ST1->isUnindexed() && ST1->isSimple()) { if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT()) { // If this is a store followed by a store with the same value to the @@ -16405,7 +16331,8 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) { break; case ISD::STORE: { StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain); - if (ST->isVolatile() || ST->isIndexed()) + // TODO: Can relax for unordered atomics (see D66309) + if (!ST->isSimple() || ST->isIndexed()) continue; const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG); // If we store purely within object bounds just before its lifetime ends, @@ -16456,6 +16383,11 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { if (OptLevel == CodeGenOpt::None) return SDValue(); + // Can't change the number of memory accesses for a volatile store or break + // atomicity for an atomic one. + if (!ST->isSimple()) + return SDValue(); + SDValue Val = ST->getValue(); SDLoc DL(ST); @@ -16531,12 +16463,52 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { } /// Convert a disguised subvector insertion into a shuffle: -/// insert_vector_elt V, (bitcast X from vector type), IdxC --> -/// bitcast(shuffle (bitcast V), (extended X), Mask) -/// Note: We do not use an insert_subvector node because that requires a legal -/// subvector type. SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { SDValue InsertVal = N->getOperand(1); + SDValue Vec = N->getOperand(0); + + // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex) + // --> (vector_shuffle X, Y) + if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() && + InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + isa<ConstantSDNode>(InsertVal.getOperand(1))) { + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode()); + ArrayRef<int> Mask = SVN->getMask(); + + SDValue X = Vec.getOperand(0); + SDValue Y = Vec.getOperand(1); + + // Vec's operand 0 is using indices from 0 to N-1 and + // operand 1 from N to 2N - 1, where N is the number of + // elements in the vectors. + int XOffset = -1; + if (InsertVal.getOperand(0) == X) { + XOffset = 0; + } else if (InsertVal.getOperand(0) == Y) { + XOffset = X.getValueType().getVectorNumElements(); + } + + if (XOffset != -1) { + SmallVector<int, 16> NewMask(Mask.begin(), Mask.end()); + + auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1)); + NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue(); + assert(NewMask[InsIndex] < + (int)(2 * Vec.getValueType().getVectorNumElements()) && + NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound"); + + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X, + Y, NewMask, DAG); + if (LegalShuffle) + return LegalShuffle; + } + } + + // insert_vector_elt V, (bitcast X from vector type), IdxC --> + // bitcast(shuffle (bitcast V), (extended X), Mask) + // Note: We do not use an insert_subvector node because that requires a + // legal subvector type. if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() || !InsertVal.getOperand(0).getValueType().isVector()) return SDValue(); @@ -16674,7 +16646,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { - assert(!OriginalLoad->isVolatile()); + assert(OriginalLoad->isSimple()); EVT ResultVT = EVE->getValueType(0); EVT VecEltVT = InVecVT.getVectorElementType(); @@ -16747,12 +16719,12 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; SDValue To[] = { Load, Chain }; DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + // Make sure to revisit this node to clean it up; it will usually be dead. + AddToWorklist(EVE); // Since we're explicitly calling ReplaceAllUses, add the new node to the // worklist explicitly as well. - AddToWorklist(Load.getNode()); AddUsersToWorklist(Load.getNode()); // Add users too - // Make sure to revisit this node to clean it up; it will usually be dead. - AddToWorklist(EVE); + AddToWorklist(Load.getNode()); ++OpsNarrowed; return SDValue(EVE, 0); } @@ -16982,7 +16954,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { ISD::isNormalLoad(VecOp.getNode()) && !Index->hasPredecessor(VecOp.getNode())) { auto *VecLoad = dyn_cast<LoadSDNode>(VecOp); - if (VecLoad && !VecLoad->isVolatile()) + if (VecLoad && VecLoad->isSimple()) return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad); } @@ -17041,7 +17013,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Make sure we found a non-volatile load and the extractelement is // the only use. - if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) + if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple()) return SDValue(); // If Idx was -1 above, Elt is going to be -1, so just return undef. @@ -17344,17 +17316,16 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) { // the shuffle mask with -1. } - // Turn this into a shuffle with zero if that's legal. - EVT VecVT = Extract.getOperand(0).getValueType(); - if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT)) - return SDValue(); - // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... --> // bitcast (shuffle V, ZeroVec, VectorMask) SDLoc DL(BV); + EVT VecVT = Extract.getOperand(0).getValueType(); SDValue ZeroVec = DAG.getConstant(0, DL, VecVT); - SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec, - ShufMask); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0), + ZeroVec, ShufMask, DAG); + if (!Shuf) + return SDValue(); return DAG.getBitcast(VT, Shuf); } @@ -17656,6 +17627,13 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } } + // A splat of a single element is a SPLAT_VECTOR if supported on the target. + if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand) + if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) { + assert(!V.isUndef() && "Splat of undef should have been handled earlier"); + return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V); + } + // Check if we can express BUILD VECTOR via subvector extract. if (!LegalTypes && (N->getNumOperands() > 1)) { SDValue Op0 = N->getOperand(0); @@ -17829,11 +17807,9 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { } } - if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT)) - return SDValue(); - - return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0), - DAG.getBitcast(VT, SV1), Mask); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0), + DAG.getBitcast(VT, SV1), Mask, DAG); } SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { @@ -17853,6 +17829,15 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { SDValue In = N->getOperand(0); assert(In.getValueType().isVector() && "Must concat vectors"); + // If the input is a concat_vectors, just make a larger concat by padding + // with smaller undefs. + if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) { + unsigned NumOps = N->getNumOperands() * In.getNumOperands(); + SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end()); + Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType())); + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); + } + SDValue Scalar = peekThroughOneUseBitcasts(In); // concat_vectors(scalar_to_vector(scalar), undef) -> @@ -18002,6 +17987,23 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return SDValue(); } +// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find +// if the subvector can be sourced for free. +static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) { + if (V.getOpcode() == ISD::INSERT_SUBVECTOR && + V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) { + return V.getOperand(1); + } + auto *IndexC = dyn_cast<ConstantSDNode>(Index); + if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS && + V.getOperand(0).getValueType() == SubVT && + (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) { + uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements(); + return V.getOperand(SubIdx); + } + return SDValue(); +} + static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -18010,39 +18012,29 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1) return SDValue(); + EVT VecVT = BinOp.getValueType(); SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1); - SDValue Index = Extract->getOperand(1); - EVT VT = Extract->getValueType(0); + if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType()) + return SDValue(); - // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find - // if the source subvector is the same type as the one being extracted. - auto GetSubVector = [VT, Index](SDValue V) -> SDValue { - if (V.getOpcode() == ISD::INSERT_SUBVECTOR && - V.getOperand(1).getValueType() == VT && V.getOperand(2) == Index) { - return V.getOperand(1); - } - auto *IndexC = dyn_cast<ConstantSDNode>(Index); - if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS && - V.getOperand(0).getValueType() == VT && - (IndexC->getZExtValue() % VT.getVectorNumElements()) == 0) { - uint64_t SubIdx = IndexC->getZExtValue() / VT.getVectorNumElements(); - return V.getOperand(SubIdx); - } + SDValue Index = Extract->getOperand(1); + EVT SubVT = Extract->getValueType(0); + if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT)) return SDValue(); - }; - SDValue Sub0 = GetSubVector(Bop0); - SDValue Sub1 = GetSubVector(Bop1); + + SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT); + SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT); // TODO: We could handle the case where only 1 operand is being inserted by // creating an extract of the other operand, but that requires checking // number of uses and/or costs. - if (!Sub0 || !Sub1 || !TLI.isOperationLegalOrCustom(BinOpcode, VT)) + if (!Sub0 || !Sub1) return SDValue(); // We are inserting both operands of the wide binop only to extract back // to the narrow vector size. Eliminate all of the insert/extract: // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y - return DAG.getNode(BinOpcode, SDLoc(Extract), VT, Sub0, Sub1, + return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1, BinOp->getFlags()); } @@ -18174,7 +18166,8 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0)); auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); - if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx) + if (!Ld || Ld->getExtensionType() || !Ld->isSimple() || + !ExtIdx) return SDValue(); // Allow targets to opt-out. @@ -18878,7 +18871,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // build_vector. if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { int SplatIndex = SVN->getSplatIndex(); - if (TLI.isExtractVecEltCheap(VT, SplatIndex) && + if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) && TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) { // splat (vector_bo L, R), Index --> // splat (scalar_bo (extelt L, Index), (extelt R, Index)) @@ -19153,22 +19146,13 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SV1 = DAG.getUNDEF(VT); // Avoid introducing shuffles with illegal mask. - if (!TLI.isShuffleMaskLegal(Mask, VT)) { - ShuffleVectorSDNode::commuteMask(Mask); - - if (!TLI.isShuffleMaskLegal(Mask, VT)) - return SDValue(); - - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) - std::swap(SV0, SV1); - } - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) - return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask); + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) + return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG); } if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG)) @@ -19191,35 +19175,35 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1); int Elt = C0->getZExtValue(); NewMask[0] = Elt; - SDValue Val; // If we have an implict truncate do truncate here as long as it's legal. // if it's not legal, this should if (VT.getScalarType() != InVal.getValueType() && InVal.getValueType().isScalarInteger() && isTypeLegal(VT.getScalarType())) { - Val = + SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal); return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val); } if (VT.getScalarType() == InVecT.getScalarType() && - VT.getVectorNumElements() <= InVecT.getVectorNumElements() && - TLI.isShuffleMaskLegal(NewMask, VT)) { - Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec, - DAG.getUNDEF(InVecT), NewMask); - // If the initial vector is the correct size this shuffle is a - // valid result. - if (VT == InVecT) - return Val; - // If not we must truncate the vector. - if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) { - MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); - SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy); - EVT SubVT = - EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(), - VT.getVectorNumElements()); - Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val, - ZeroIdx); - return Val; + VT.getVectorNumElements() <= InVecT.getVectorNumElements()) { + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec, + DAG.getUNDEF(InVecT), NewMask, DAG); + if (LegalShuffle) { + // If the initial vector is the correct size this shuffle is a + // valid result. + if (VT == InVecT) + return LegalShuffle; + // If not we must truncate the vector. + if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) { + MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy); + EVT SubVT = + EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(), + VT.getVectorNumElements()); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, + LegalShuffle, ZeroIdx); + } } } } @@ -19627,6 +19611,39 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { } } + // Make sure all but the first op are undef or constant. + auto ConcatWithConstantOrUndef = [](SDValue Concat) { + return Concat.getOpcode() == ISD::CONCAT_VECTORS && + std::all_of(std::next(Concat->op_begin()), Concat->op_end(), + [](const SDValue &Op) { + return Op.isUndef() || + ISD::isBuildVectorOfConstantSDNodes(Op.getNode()); + }); + }; + + // The following pattern is likely to emerge with vector reduction ops. Moving + // the binary operation ahead of the concat may allow using a narrower vector + // instruction that has better performance than the wide version of the op: + // VBinOp (concat X, undef/constant), (concat Y, undef/constant) --> + // concat (VBinOp X, Y), VecC + if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) && + (LHS.hasOneUse() || RHS.hasOneUse())) { + EVT NarrowVT = LHS.getOperand(0).getValueType(); + if (NarrowVT == RHS.getOperand(0).getValueType() && + TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) { + SDLoc DL(N); + unsigned NumOperands = LHS.getNumOperands(); + SmallVector<SDValue, 4> ConcatOps; + for (unsigned i = 0; i != NumOperands; ++i) { + // This constant fold for operands 1 and up. + ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i), + RHS.getOperand(i))); + } + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); + } + } + if (SDValue V = scalarizeBinOpOfSplats(N, DAG)) return V; @@ -19723,7 +19740,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, // Token chains must be identical. if (LHS.getOperand(0) != RHS.getOperand(0) || // Do not let this transformation reduce the number of volatile loads. - LLD->isVolatile() || RLD->isVolatile() || + // Be conservative for atomics for the moment + // TODO: This does appear to be legal for unordered atomics (see D66309) + !LLD->isSimple() || !RLD->isSimple() || // FIXME: If either is a pre/post inc/dec load, // we'd need to split out the address adjustment. LLD->isIndexed() || RLD->isIndexed() || @@ -19928,7 +19947,7 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset( const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC) { - if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint())) + if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType())) return SDValue(); // If we are before legalize types, we want the other legalization to happen @@ -20016,8 +20035,13 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, // when the condition can be materialized as an all-ones register. Any // single bit-test can be materialized as an all-ones register with // shift-left and shift-right-arith. + // TODO: The operation legality checks could be loosened to include "custom", + // but that may cause regressions for targets that do not have shift + // instructions. if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && - N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) { + N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2) && + TLI.isOperationLegal(ISD::SHL, VT) && + TLI.isOperationLegal(ISD::SRA, VT)) { SDValue AndLHS = N0->getOperand(0); auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { @@ -20209,7 +20233,10 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { /// => /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form /// does not require additional intermediate precision] -SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) { +/// For the last iteration, put numerator N into it to gain more precision: +/// Result = N X_i + X_i (N - N A X_i) +SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op, + SDNodeFlags Flags) { if (Level >= AfterLegalizeDAG) return SDValue(); @@ -20230,25 +20257,39 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) { if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) { AddToWorklist(Est.getNode()); + SDLoc DL(Op); if (Iterations) { - SDLoc DL(Op); SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); - // Newton iterations: Est = Est + Est (1 - Arg * Est) + // Newton iterations: Est = Est + Est (N - Arg * Est) + // If this is the last iteration, also multiply by the numerator. for (int i = 0; i < Iterations; ++i) { - SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags); + SDValue MulEst = Est; + + if (i == Iterations - 1) { + MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags); + AddToWorklist(MulEst.getNode()); + } + + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags); AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags); + NewEst = DAG.getNode(ISD::FSUB, DL, VT, + (i == Iterations - 1 ? N : FPOne), NewEst, Flags); AddToWorklist(NewEst.getNode()); NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); AddToWorklist(NewEst.getNode()); - Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags); + Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags); AddToWorklist(Est.getNode()); } + } else { + // If no iterations are available, multiply with N. + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags); + AddToWorklist(Est.getNode()); } + return Est; } @@ -20271,31 +20312,19 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that // this entire sequence requires only one FP constant. SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); - AddToWorklist(HalfArg.getNode()); - HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); - AddToWorklist(HalfArg.getNode()); // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); - AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); - AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); - AddToWorklist(NewEst.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); - AddToWorklist(Est.getNode()); } // If non-reciprocal square root is requested, multiply the result by Arg. - if (!Reciprocal) { + if (!Reciprocal) Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); - AddToWorklist(Est.getNode()); - } return Est; } @@ -20321,13 +20350,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, // E = (E * -0.5) * ((A * E) * E + -3.0) for (unsigned i = 0; i < Iterations; ++i) { SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); - AddToWorklist(AE.getNode()); - SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); - AddToWorklist(AEE.getNode()); - SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); - AddToWorklist(RHS.getNode()); // When calculating a square root at the last iteration build: // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) @@ -20340,10 +20364,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, // SQRT: LHS = (A * E) * -0.5 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags); } - AddToWorklist(LHS.getNode()); Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); - AddToWorklist(Est.getNode()); } return Est; @@ -20400,16 +20422,11 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est); - AddToWorklist(Fabs.getNode()); - AddToWorklist(IsDenorm.getNode()); - AddToWorklist(Est.getNode()); } else { // X == 0.0 ? 0.0 : Est SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est); - AddToWorklist(IsZero.getNode()); - AddToWorklist(Est.getNode()); } } } @@ -20432,6 +20449,7 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { struct MemUseCharacteristics { bool IsVolatile; + bool IsAtomic; SDValue BasePtr; int64_t Offset; Optional<int64_t> NumBytes; @@ -20447,18 +20465,20 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { : (LSN->getAddressingMode() == ISD::PRE_DEC) ? -1 * C->getSExtValue() : 0; - return {LSN->isVolatile(), LSN->getBasePtr(), Offset /*base offset*/, + return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(), + Offset /*base offset*/, Optional<int64_t>(LSN->getMemoryVT().getStoreSize()), LSN->getMemOperand()}; } if (const auto *LN = cast<LifetimeSDNode>(N)) - return {false /*isVolatile*/, LN->getOperand(1), + return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1), (LN->hasOffset()) ? LN->getOffset() : 0, (LN->hasOffset()) ? Optional<int64_t>(LN->getSize()) : Optional<int64_t>(), (MachineMemOperand *)nullptr}; // Default. - return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/, + return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(), + (int64_t)0 /*offset*/, Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr}; }; @@ -20474,6 +20494,11 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { if (MUC0.IsVolatile && MUC1.IsVolatile) return true; + // Be conservative about atomics for the moment + // TODO: This is way overconservative for unordered atomics (see D66309) + if (MUC0.IsAtomic && MUC1.IsAtomic) + return true; + if (MUC0.MMO && MUC1.MMO) { if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) || (MUC1.MMO->isInvariant() && MUC0.MMO->isStore())) @@ -20555,7 +20580,8 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallPtrSet<SDNode *, 16> Visited; // Visited node set. // Get alias information for node. - const bool IsLoad = isa<LoadSDNode>(N) && !cast<LoadSDNode>(N)->isVolatile(); + // TODO: relax aliasing for unordered atomics (see D66309) + const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple(); // Starting off. Chains.push_back(OriginalChain); @@ -20571,8 +20597,9 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, case ISD::LOAD: case ISD::STORE: { // Get alias information for C. + // TODO: Relax aliasing for unordered atomics (see D66309) bool IsOpLoad = isa<LoadSDNode>(C.getNode()) && - !cast<LSBaseSDNode>(C.getNode())->isVolatile(); + cast<LSBaseSDNode>(C.getNode())->isSimple(); if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) { // Look further up the chain. C = C.getOperand(0); @@ -20727,7 +20754,8 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { // If the chain has more than one use, then we can't reorder the mem ops. if (!SDValue(Chain, 0)->hasOneUse()) break; - if (Chain->isVolatile() || Chain->isIndexed()) + // TODO: Relax for unordered atomics (see D66309) + if (!Chain->isSimple() || Chain->isIndexed()) break; // Find the base pointer and offset for this memory node. @@ -20795,11 +20823,11 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps); CombineTo(St, TF); - AddToWorklist(STChain); - // Add TF operands worklist in reverse order. - for (auto I = TF->getNumOperands(); I;) - AddToWorklist(TF->getOperand(--I).getNode()); + // Add TF and its operands to the worklist. AddToWorklist(TF.getNode()); + for (const SDValue &Op : TF->ops()) + AddToWorklist(Op.getNode()); + AddToWorklist(STChain); return true; } |