diff options
Diffstat (limited to 'lib/Transforms')
24 files changed, 1016 insertions, 437 deletions
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 203594572618..ec06d5f9fb05 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -44,8 +44,12 @@ using namespace llvm; static cl::opt<bool> -RunLoopVectorization("vectorize-loops", cl::Hidden, - cl::desc("Run the Loop vectorization passes")); + RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, + cl::ZeroOrMore, cl::desc("Run Partial inlinining pass")); + +static cl::opt<bool> + RunLoopVectorization("vectorize-loops", cl::Hidden, + cl::desc("Run the Loop vectorization passes")); static cl::opt<bool> RunSLPVectorization("vectorize-slp", cl::Hidden, @@ -516,6 +520,8 @@ void PassManagerBuilder::populateModulePassManager( // pass manager that we are specifically trying to avoid. To prevent this // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); + if (RunPartialInlining) + MPM.add(createPartialInliningPass()); if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO && !PrepareForThinLTO) diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 0ca62b7ae40c..733eeb1767a3 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -852,8 +852,9 @@ Value *FAddCombine::createAddendVal(const FAddend &Opnd, bool &NeedNeg) { /// This basically requires proving that the add in the original type would not /// overflow to change the sign bit or have a carry out. /// TODO: Handle this for Vectors. -bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS, - Instruction &CxtI) { +bool InstCombiner::willNotOverflowSignedSub(const Value *LHS, + const Value *RHS, + const Instruction &CxtI) const { // If LHS and RHS each have at least two sign bits, the subtraction // cannot overflow. if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 && @@ -869,8 +870,8 @@ bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS, // Subtraction of two 2's complement numbers having identical signs will // never overflow. - if ((LHSKnown.One[BitWidth - 1] && RHSKnown.One[BitWidth - 1]) || - (LHSKnown.Zero[BitWidth - 1] && RHSKnown.Zero[BitWidth - 1])) + if ((LHSKnown.isNegative() && RHSKnown.isNegative()) || + (LHSKnown.isNonNegative() && RHSKnown.isNonNegative())) return true; // TODO: implement logic similar to checkRippleForAdd @@ -879,8 +880,9 @@ bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS, /// \brief Return true if we can prove that: /// (sub LHS, RHS) === (sub nuw LHS, RHS) -bool InstCombiner::WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, - Instruction &CxtI) { +bool InstCombiner::willNotOverflowUnsignedSub(const Value *LHS, + const Value *RHS, + const Instruction &CxtI) const { // If the LHS is negative and the RHS is non-negative, no unsigned wrap. KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, &CxtI); KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, &CxtI); @@ -1180,7 +1182,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { Constant *CI = ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); if (ConstantExpr::getSExt(CI, I.getType()) == RHSC && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) { + willNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) { // Insert the new, smaller add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); @@ -1197,7 +1199,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (LHSConv->getOperand(0)->getType() == RHSConv->getOperand(0)->getType() && (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), + willNotOverflowSignedAdd(LHSConv->getOperand(0), RHSConv->getOperand(0), I)) { // Insert the new integer add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), @@ -1275,10 +1277,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { } } - // TODO(jingyue): Consider WillNotOverflowSignedAdd and + // TODO(jingyue): Consider willNotOverflowSignedAdd and // willNotOverflowUnsignedAdd to reduce the number of invocations of // computeKnownBits. - if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, I)) { + if (!I.hasNoSignedWrap() && willNotOverflowSignedAdd(LHS, RHS, I)) { Changed = true; I.setHasNoSignedWrap(true); } @@ -1351,7 +1353,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { ConstantExpr::getFPToSI(CFP, LHSIntVal->getType()); if (LHSConv->hasOneUse() && ConstantExpr::getSIToFP(CI, I.getType()) == CFP && - WillNotOverflowSignedAdd(LHSIntVal, CI, I)) { + willNotOverflowSignedAdd(LHSIntVal, CI, I)) { // Insert the new integer add. Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, CI, "addconv"); @@ -1370,7 +1372,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { // and if the integer add will not overflow. if (LHSIntVal->getType() == RHSIntVal->getType() && (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && - WillNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) { + willNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) { // Insert the new integer add. Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, RHSIntVal, "addconv"); @@ -1676,11 +1678,11 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return replaceInstUsesWith(I, Res); bool Changed = false; - if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, I)) { + if (!I.hasNoSignedWrap() && willNotOverflowSignedSub(Op0, Op1, I)) { Changed = true; I.setHasNoSignedWrap(true); } - if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, I)) { + if (!I.hasNoUnsignedWrap() && willNotOverflowUnsignedSub(Op0, Op1, I)) { Changed = true; I.setHasNoUnsignedWrap(true); } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 82dc88f1b3ad..4227b2d01be8 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -764,7 +764,7 @@ foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS, } /// Fold (icmp)&(icmp) if possible. -Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { +Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) @@ -943,7 +943,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { /// Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of instcombine, this returns /// a Value which should already be inserted into the function. -Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { +Value *InstCombiner::foldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); @@ -1126,8 +1126,8 @@ Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) { ICmpInst *ICmp0 = dyn_cast<ICmpInst>(Cast0Src); ICmpInst *ICmp1 = dyn_cast<ICmpInst>(Cast1Src); if (ICmp0 && ICmp1) { - Value *Res = LogicOpc == Instruction::And ? FoldAndOfICmps(ICmp0, ICmp1) - : FoldOrOfICmps(ICmp0, ICmp1, &I); + Value *Res = LogicOpc == Instruction::And ? foldAndOfICmps(ICmp0, ICmp1) + : foldOrOfICmps(ICmp0, ICmp1, &I); if (Res) return CastInst::Create(CastOpcode, Res, DestTy); return nullptr; @@ -1138,8 +1138,8 @@ Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) { FCmpInst *FCmp0 = dyn_cast<FCmpInst>(Cast0Src); FCmpInst *FCmp1 = dyn_cast<FCmpInst>(Cast1Src); if (FCmp0 && FCmp1) { - Value *Res = LogicOpc == Instruction::And ? FoldAndOfFCmps(FCmp0, FCmp1) - : FoldOrOfFCmps(FCmp0, FCmp1); + Value *Res = LogicOpc == Instruction::And ? foldAndOfFCmps(FCmp0, FCmp1) + : foldOrOfFCmps(FCmp0, FCmp1); if (Res) return CastInst::Create(CastOpcode, Res, DestTy); return nullptr; @@ -1425,7 +1425,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { ICmpInst *LHS = dyn_cast<ICmpInst>(Op0); ICmpInst *RHS = dyn_cast<ICmpInst>(Op1); if (LHS && RHS) - if (Value *Res = FoldAndOfICmps(LHS, RHS)) + if (Value *Res = foldAndOfICmps(LHS, RHS)) return replaceInstUsesWith(I, Res); // TODO: Make this recursive; it's a little tricky because an arbitrary @@ -1433,18 +1433,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { Value *X, *Y; if (LHS && match(Op1, m_OneUse(m_And(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast<ICmpInst>(X)) - if (Value *Res = FoldAndOfICmps(LHS, Cmp)) + if (Value *Res = foldAndOfICmps(LHS, Cmp)) return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y)); if (auto *Cmp = dyn_cast<ICmpInst>(Y)) - if (Value *Res = FoldAndOfICmps(LHS, Cmp)) + if (Value *Res = foldAndOfICmps(LHS, Cmp)) return replaceInstUsesWith(I, Builder->CreateAnd(Res, X)); } if (RHS && match(Op0, m_OneUse(m_And(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast<ICmpInst>(X)) - if (Value *Res = FoldAndOfICmps(Cmp, RHS)) + if (Value *Res = foldAndOfICmps(Cmp, RHS)) return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y)); if (auto *Cmp = dyn_cast<ICmpInst>(Y)) - if (Value *Res = FoldAndOfICmps(Cmp, RHS)) + if (Value *Res = foldAndOfICmps(Cmp, RHS)) return replaceInstUsesWith(I, Builder->CreateAnd(Res, X)); } } @@ -1452,7 +1452,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // If and'ing two fcmp, try combine them into one. if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) - if (Value *Res = FoldAndOfFCmps(LHS, RHS)) + if (Value *Res = foldAndOfFCmps(LHS, RHS)) return replaceInstUsesWith(I, Res); if (Instruction *CastedAnd = foldCastedBitwiseLogic(I)) @@ -1589,7 +1589,7 @@ static Value *matchSelectFromAndOr(Value *A, Value *C, Value *B, Value *D, } /// Fold (icmp)|(icmp) if possible. -Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, +Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction *CxtI) { ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); @@ -1846,7 +1846,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, /// Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of instcombine, this returns /// a Value which should already be inserted into the function. -Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { +Value *InstCombiner::foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); @@ -2197,7 +2197,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { ICmpInst *LHS = dyn_cast<ICmpInst>(Op0); ICmpInst *RHS = dyn_cast<ICmpInst>(Op1); if (LHS && RHS) - if (Value *Res = FoldOrOfICmps(LHS, RHS, &I)) + if (Value *Res = foldOrOfICmps(LHS, RHS, &I)) return replaceInstUsesWith(I, Res); // TODO: Make this recursive; it's a little tricky because an arbitrary @@ -2205,18 +2205,18 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Value *X, *Y; if (LHS && match(Op1, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast<ICmpInst>(X)) - if (Value *Res = FoldOrOfICmps(LHS, Cmp, &I)) + if (Value *Res = foldOrOfICmps(LHS, Cmp, &I)) return replaceInstUsesWith(I, Builder->CreateOr(Res, Y)); if (auto *Cmp = dyn_cast<ICmpInst>(Y)) - if (Value *Res = FoldOrOfICmps(LHS, Cmp, &I)) + if (Value *Res = foldOrOfICmps(LHS, Cmp, &I)) return replaceInstUsesWith(I, Builder->CreateOr(Res, X)); } if (RHS && match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast<ICmpInst>(X)) - if (Value *Res = FoldOrOfICmps(Cmp, RHS, &I)) + if (Value *Res = foldOrOfICmps(Cmp, RHS, &I)) return replaceInstUsesWith(I, Builder->CreateOr(Res, Y)); if (auto *Cmp = dyn_cast<ICmpInst>(Y)) - if (Value *Res = FoldOrOfICmps(Cmp, RHS, &I)) + if (Value *Res = foldOrOfICmps(Cmp, RHS, &I)) return replaceInstUsesWith(I, Builder->CreateOr(Res, X)); } } @@ -2224,7 +2224,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) - if (Value *Res = FoldOrOfFCmps(LHS, RHS)) + if (Value *Res = foldOrOfFCmps(LHS, RHS)) return replaceInstUsesWith(I, Res); if (Instruction *CastedOr = foldCastedBitwiseLogic(I)) @@ -2320,6 +2320,24 @@ static Instruction *foldXorToXor(BinaryOperator &I) { return nullptr; } +Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) { + if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) { + if (LHS->getOperand(0) == RHS->getOperand(1) && + LHS->getOperand(1) == RHS->getOperand(0)) + LHS->swapOperands(); + if (LHS->getOperand(0) == RHS->getOperand(0) && + LHS->getOperand(1) == RHS->getOperand(1)) { + // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) + Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); + unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS); + bool isSigned = LHS->isSigned() || RHS->isSigned(); + return getNewICmpValue(isSigned, Code, Op0, Op1, Builder); + } + } + + return nullptr; +} + // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. @@ -2579,23 +2597,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { match(Op1, m_Not(m_Specific(A)))) return BinaryOperator::CreateNot(Builder->CreateAnd(A, B)); - // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) - if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) - if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) - if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) { - if (LHS->getOperand(0) == RHS->getOperand(1) && - LHS->getOperand(1) == RHS->getOperand(0)) - LHS->swapOperands(); - if (LHS->getOperand(0) == RHS->getOperand(0) && - LHS->getOperand(1) == RHS->getOperand(1)) { - Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); - unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS); - bool isSigned = LHS->isSigned() || RHS->isSigned(); - return replaceInstUsesWith(I, - getNewICmpValue(isSigned, Code, Op0, Op1, - Builder)); - } - } + if (auto *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) + if (auto *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) + if (Value *V = foldXorOfICmps(LHS, RHS)) + return replaceInstUsesWith(I, V); if (Instruction *CastedXor = foldCastedBitwiseLogic(I)) return CastedXor; diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 001a4bcf16f3..f4bf5221f6a2 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -585,6 +585,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { return CastInst::CreateIntegerCast(Shift, DestTy, false); } + // FIXME: We should canonicalize to zext/trunc and remove this transform. // Transform trunc(lshr (sext A), Cst) to ashr A, Cst to eliminate type // conversion. // It works because bits coming from sign extension have the same value as @@ -595,18 +596,24 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { Value *SExt = cast<Instruction>(Src)->getOperand(0); const unsigned SExtSize = SExt->getType()->getPrimitiveSizeInBits(); const unsigned ASize = A->getType()->getPrimitiveSizeInBits(); + const unsigned CISize = CI.getType()->getPrimitiveSizeInBits(); + const unsigned MaxAmt = SExtSize - std::max(CISize, ASize); unsigned ShiftAmt = Cst->getZExtValue(); + // This optimization can be only performed when zero bits generated by // the original lshr aren't pulled into the value after truncation, so we // can only shift by values no larger than the number of extension bits. // FIXME: Instead of bailing when the shift is too large, use and to clear // the extra bits. - if (SExt->hasOneUse() && ShiftAmt <= SExtSize - ASize) { - // If shifting by the size of the original value in bits or more, it is - // being filled with the sign bit, so shift by ASize-1 to avoid ub. - Value *Shift = Builder->CreateAShr(A, std::min(ShiftAmt, ASize-1)); - Shift->takeName(Src); - return CastInst::CreateIntegerCast(Shift, CI.getType(), true); + if (ShiftAmt <= MaxAmt) { + if (CISize == ASize) + return BinaryOperator::CreateAShr(A, ConstantInt::get(CI.getType(), + std::min(ShiftAmt, ASize - 1))); + if (SExt->hasOneUse()) { + Value *Shift = Builder->CreateAShr(A, std::min(ShiftAmt, ASize-1)); + Shift->takeName(Src); + return CastInst::CreateIntegerCast(Shift, CI.getType(), true); + } } } diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 60ed4057cedd..6492eaedae9c 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3506,7 +3506,7 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, // We can strength reduce this signed add into a regular add if we can prove // that it will never overflow. if (OCF == OCF_SIGNED_ADD) - if (WillNotOverflowSignedAdd(LHS, RHS, OrigI)) + if (willNotOverflowSignedAdd(LHS, RHS, OrigI)) return SetResult(Builder->CreateNSWAdd(LHS, RHS), Builder->getFalse(), true); break; @@ -3519,11 +3519,11 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, return SetResult(LHS, Builder->getFalse(), false); if (OCF == OCF_SIGNED_SUB) { - if (WillNotOverflowSignedSub(LHS, RHS, OrigI)) + if (willNotOverflowSignedSub(LHS, RHS, OrigI)) return SetResult(Builder->CreateNSWSub(LHS, RHS), Builder->getFalse(), true); } else { - if (WillNotOverflowUnsignedSub(LHS, RHS, OrigI)) + if (willNotOverflowUnsignedSub(LHS, RHS, OrigI)) return SetResult(Builder->CreateNUWSub(LHS, RHS), Builder->getFalse(), true); } @@ -3553,7 +3553,7 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, return SetResult(LHS, Builder->getFalse(), false); if (OCF == OCF_SIGNED_MUL) - if (WillNotOverflowSignedMul(LHS, RHS, OrigI)) + if (willNotOverflowSignedMul(LHS, RHS, OrigI)) return SetResult(Builder->CreateNSWMul(LHS, RHS), Builder->getFalse(), true); break; @@ -4260,6 +4260,80 @@ static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) { return new ICmpInst(NewPred, Op0, ConstantExpr::getAdd(Op1C, OneOrNegOne)); } +/// Integer compare with boolean values can always be turned into bitwise ops. +static Instruction *canonicalizeICmpBool(ICmpInst &I, + InstCombiner::BuilderTy &Builder) { + Value *A = I.getOperand(0), *B = I.getOperand(1); + assert(A->getType()->getScalarType()->isIntegerTy(1) && "Bools only"); + + // A boolean compared to true/false can be simplified to Op0/true/false in + // 14 out of the 20 (10 predicates * 2 constants) possible combinations. + // Cases not handled by InstSimplify are always 'not' of Op0. + if (match(B, m_Zero())) { + switch (I.getPredicate()) { + case CmpInst::ICMP_EQ: // A == 0 -> !A + case CmpInst::ICMP_ULE: // A <=u 0 -> !A + case CmpInst::ICMP_SGE: // A >=s 0 -> !A + return BinaryOperator::CreateNot(A); + default: + llvm_unreachable("ICmp i1 X, C not simplified as expected."); + } + } else if (match(B, m_One())) { + switch (I.getPredicate()) { + case CmpInst::ICMP_NE: // A != 1 -> !A + case CmpInst::ICMP_ULT: // A <u 1 -> !A + case CmpInst::ICMP_SGT: // A >s -1 -> !A + return BinaryOperator::CreateNot(A); + default: + llvm_unreachable("ICmp i1 X, C not simplified as expected."); + } + } + + switch (I.getPredicate()) { + default: + llvm_unreachable("Invalid icmp instruction!"); + case ICmpInst::ICMP_EQ: + // icmp eq i1 A, B -> ~(A ^ B) + return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); + + case ICmpInst::ICMP_NE: + // icmp ne i1 A, B -> A ^ B + return BinaryOperator::CreateXor(A, B); + + case ICmpInst::ICMP_UGT: + // icmp ugt -> icmp ult + std::swap(A, B); + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_ULT: + // icmp ult i1 A, B -> ~A & B + return BinaryOperator::CreateAnd(Builder.CreateNot(A), B); + + case ICmpInst::ICMP_SGT: + // icmp sgt -> icmp slt + std::swap(A, B); + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_SLT: + // icmp slt i1 A, B -> A & ~B + return BinaryOperator::CreateAnd(Builder.CreateNot(B), A); + + case ICmpInst::ICMP_UGE: + // icmp uge -> icmp ule + std::swap(A, B); + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_ULE: + // icmp ule i1 A, B -> ~A | B + return BinaryOperator::CreateOr(Builder.CreateNot(A), B); + + case ICmpInst::ICMP_SGE: + // icmp sge -> icmp sle + std::swap(A, B); + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_SLE: + // icmp sle i1 A, B -> A | ~B + return BinaryOperator::CreateOr(Builder.CreateNot(B), A); + } +} + Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { bool Changed = false; Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); @@ -4297,49 +4371,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } } - Type *Ty = Op0->getType(); - - // icmp's with boolean values can always be turned into bitwise operations - if (Ty->getScalarType()->isIntegerTy(1)) { - switch (I.getPredicate()) { - default: llvm_unreachable("Invalid icmp instruction!"); - case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B) - Value *Xor = Builder->CreateXor(Op0, Op1, I.getName() + "tmp"); - return BinaryOperator::CreateNot(Xor); - } - case ICmpInst::ICMP_NE: // icmp ne i1 A, B -> A^B - return BinaryOperator::CreateXor(Op0, Op1); - - case ICmpInst::ICMP_UGT: - std::swap(Op0, Op1); // Change icmp ugt -> icmp ult - LLVM_FALLTHROUGH; - case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B - Value *Not = Builder->CreateNot(Op0, I.getName() + "tmp"); - return BinaryOperator::CreateAnd(Not, Op1); - } - case ICmpInst::ICMP_SGT: - std::swap(Op0, Op1); // Change icmp sgt -> icmp slt - LLVM_FALLTHROUGH; - case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B - Value *Not = Builder->CreateNot(Op1, I.getName() + "tmp"); - return BinaryOperator::CreateAnd(Not, Op0); - } - case ICmpInst::ICMP_UGE: - std::swap(Op0, Op1); // Change icmp uge -> icmp ule - LLVM_FALLTHROUGH; - case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B - Value *Not = Builder->CreateNot(Op0, I.getName() + "tmp"); - return BinaryOperator::CreateOr(Not, Op1); - } - case ICmpInst::ICMP_SGE: - std::swap(Op0, Op1); // Change icmp sge -> icmp sle - LLVM_FALLTHROUGH; - case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B - Value *Not = Builder->CreateNot(Op1, I.getName() + "tmp"); - return BinaryOperator::CreateOr(Not, Op0); - } - } - } + if (Op0->getType()->getScalarType()->isIntegerTy(1)) + if (Instruction *Res = canonicalizeICmpBool(I, *Builder)) + return Res; if (ICmpInst *NewICmp = canonicalizeCmpWithConstant(I)) return NewICmp; diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index f88a2c6acc3f..6829be86885b 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -275,11 +275,7 @@ public: Instruction *visitSDiv(BinaryOperator &I); Instruction *visitFDiv(BinaryOperator &I); Value *simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, bool Inverted); - Value *FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS); - Value *FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS); Instruction *visitAnd(BinaryOperator &I); - Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction *CxtI); - Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS); Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A, Value *B, Value *C); Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op, Value *A, @@ -410,18 +406,24 @@ private: bool DoTransform = true); Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); - bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction &CxtI) { + bool willNotOverflowSignedAdd(const Value *LHS, const Value *RHS, + const Instruction &CxtI) const { return computeOverflowForSignedAdd(LHS, RHS, &CxtI) == OverflowResult::NeverOverflows; }; - bool willNotOverflowUnsignedAdd(Value *LHS, Value *RHS, Instruction &CxtI) { + bool willNotOverflowUnsignedAdd(const Value *LHS, const Value *RHS, + const Instruction &CxtI) const { return computeOverflowForUnsignedAdd(LHS, RHS, &CxtI) == OverflowResult::NeverOverflows; }; - bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction &CxtI); - bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction &CxtI); - bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction &CxtI); - bool willNotOverflowUnsignedMul(Value *LHS, Value *RHS, Instruction &CxtI) { + bool willNotOverflowSignedSub(const Value *LHS, const Value *RHS, + const Instruction &CxtI) const; + bool willNotOverflowUnsignedSub(const Value *LHS, const Value *RHS, + const Instruction &CxtI) const; + bool willNotOverflowSignedMul(const Value *LHS, const Value *RHS, + const Instruction &CxtI) const; + bool willNotOverflowUnsignedMul(const Value *LHS, const Value *RHS, + const Instruction &CxtI) const { return computeOverflowForUnsignedMul(LHS, RHS, &CxtI) == OverflowResult::NeverOverflows; }; @@ -445,6 +447,12 @@ private: Instruction::CastOps isEliminableCastPair(const CastInst *CI1, const CastInst *CI2); + Value *foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS); + Value *foldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS); + Value *foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction *CxtI); + Value *foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS); + Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS); + public: /// \brief Inserts an instruction \p New before instruction \p Old /// @@ -523,29 +531,31 @@ public: return nullptr; // Don't do anything with FI } - void computeKnownBits(Value *V, KnownBits &Known, - unsigned Depth, Instruction *CxtI) const { + void computeKnownBits(const Value *V, KnownBits &Known, + unsigned Depth, const Instruction *CxtI) const { llvm::computeKnownBits(V, Known, DL, Depth, &AC, CxtI, &DT); } - KnownBits computeKnownBits(Value *V, unsigned Depth, - Instruction *CxtI) const { + KnownBits computeKnownBits(const Value *V, unsigned Depth, + const Instruction *CxtI) const { return llvm::computeKnownBits(V, DL, Depth, &AC, CxtI, &DT); } - bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth = 0, - Instruction *CxtI = nullptr) const { + bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth = 0, + const Instruction *CxtI = nullptr) const { return llvm::MaskedValueIsZero(V, Mask, DL, Depth, &AC, CxtI, &DT); } - unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0, - Instruction *CxtI = nullptr) const { + unsigned ComputeNumSignBits(const Value *Op, unsigned Depth = 0, + const Instruction *CxtI = nullptr) const { return llvm::ComputeNumSignBits(Op, DL, Depth, &AC, CxtI, &DT); } - OverflowResult computeOverflowForUnsignedMul(Value *LHS, Value *RHS, - const Instruction *CxtI) { + OverflowResult computeOverflowForUnsignedMul(const Value *LHS, + const Value *RHS, + const Instruction *CxtI) const { return llvm::computeOverflowForUnsignedMul(LHS, RHS, DL, &AC, CxtI, &DT); } - OverflowResult computeOverflowForUnsignedAdd(Value *LHS, Value *RHS, - const Instruction *CxtI) { + OverflowResult computeOverflowForUnsignedAdd(const Value *LHS, + const Value *RHS, + const Instruction *CxtI) const { return llvm::computeOverflowForUnsignedAdd(LHS, RHS, DL, &AC, CxtI, &DT); } OverflowResult computeOverflowForSignedAdd(const Value *LHS, diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 2a35259f2103..fc13854f8fe7 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -132,8 +132,9 @@ static Constant *getLogBase2Vector(ConstantDataVector *CV) { /// \brief Return true if we can prove that: /// (mul LHS, RHS) === (mul nsw LHS, RHS) -bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS, - Instruction &CxtI) { +bool InstCombiner::willNotOverflowSignedMul(const Value *LHS, + const Value *RHS, + const Instruction &CxtI) const { // Multiplying n * m significant bits yields a result of n + m significant // bits. If the total number of significant bits does not exceed the // result bit width (minus 1), there is no overflow. @@ -384,7 +385,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { Constant *CI = ConstantExpr::getTrunc(Op1C, Op0Conv->getOperand(0)->getType()); if (ConstantExpr::getSExt(CI, I.getType()) == Op1C && - WillNotOverflowSignedMul(Op0Conv->getOperand(0), CI, I)) { + willNotOverflowSignedMul(Op0Conv->getOperand(0), CI, I)) { // Insert the new, smaller mul. Value *NewMul = Builder->CreateNSWMul(Op0Conv->getOperand(0), CI, "mulconv"); @@ -401,7 +402,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (Op0Conv->getOperand(0)->getType() == Op1Conv->getOperand(0)->getType() && (Op0Conv->hasOneUse() || Op1Conv->hasOneUse()) && - WillNotOverflowSignedMul(Op0Conv->getOperand(0), + willNotOverflowSignedMul(Op0Conv->getOperand(0), Op1Conv->getOperand(0), I)) { // Insert the new integer mul. Value *NewMul = Builder->CreateNSWMul( @@ -447,7 +448,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { } } - if (!I.hasNoSignedWrap() && WillNotOverflowSignedMul(Op0, Op1, I)) { + if (!I.hasNoSignedWrap() && willNotOverflowSignedMul(Op0, Op1, I)) { Changed = true; I.setHasNoSignedWrap(true); } diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index d8f8a58a5fdf..c4f450949e6d 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -606,7 +606,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { if (unsigned Count = replaceDominatedUsesWith( CondInst, TorF, DT, BasicBlockEdge(Pred, BB))) { Changed = true; - NumCSECVP = NumCSECVP + Count; + NumCSECVP += Count; } } } diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index c04646eed49a..0490d93f6455 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -2057,7 +2057,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) { if (!performScalarPREInsertion(PREInstr, PREPred, ValNo)) { // If we failed insertion, make sure we remove the instruction. DEBUG(verifyRemoved(PREInstr)); - delete PREInstr; + PREInstr->deleteValue(); return false; } } diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index ae353ea44595..ada22ae38eb8 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -833,15 +833,13 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { CondBr->eraseFromParent(); if (CondCmp->use_empty()) CondCmp->eraseFromParent(); - else if (CondCmp->getParent() == BB) { - // If the fact we just learned is true for all uses of the - // condition, replace it with a constant value - auto *CI = Ret == LazyValueInfo::True ? - ConstantInt::getTrue(CondCmp->getType()) : - ConstantInt::getFalse(CondCmp->getType()); - CondCmp->replaceAllUsesWith(CI); - CondCmp->eraseFromParent(); - } + // TODO: We can safely replace *some* uses of the CondInst if it has + // exactly one value as returned by LVI. RAUW is incorrect in the + // presence of guards and assumes, that have the `Cond` as the use. This + // is because we use the guards/assume to reason about the `Cond` value + // at the end of block, but RAUW unconditionally replaces all uses + // including the guards/assumes themselves and the uses before the + // guard/assume. return true; } @@ -1327,14 +1325,13 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, if (auto *CondInst = dyn_cast<Instruction>(Cond)) { if (CondInst->use_empty() && !CondInst->mayHaveSideEffects()) CondInst->eraseFromParent(); - else if (OnlyVal && OnlyVal != MultipleVal && - CondInst->getParent() == BB) { - // If we just learned Cond is the same value for all uses of the - // condition, replace it with a constant value - CondInst->replaceAllUsesWith(OnlyVal); - if (!CondInst->mayHaveSideEffects()) - CondInst->eraseFromParent(); - } + // TODO: We can safely replace *some* uses of the CondInst if it has + // exactly one value as returned by LVI. RAUW is incorrect in the + // presence of guards and assumes, that have the `Cond` as the use. This + // is because we use the guards/assume to reason about the `Cond` value + // at the end of block, but RAUW unconditionally replaces all uses + // including the guards/assumes themselves and the uses before the + // guard/assume. } return true; } @@ -1909,7 +1906,7 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred( {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) { ValueMapping[&*BI] = IV; if (!New->mayHaveSideEffects()) { - delete New; + New->deleteValue(); New = nullptr; } } else { diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp index 02215d3450c2..494cbc61bc9c 100644 --- a/lib/Transforms/Scalar/LoadCombine.cpp +++ b/lib/Transforms/Scalar/LoadCombine.cpp @@ -228,7 +228,7 @@ bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) { L.Load->replaceAllUsesWith(V); } - NumLoadsCombined = NumLoadsCombined + Loads.size(); + NumLoadsCombined += Loads.size(); return true; } diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index cb6223b070a6..97337ea5ba62 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -110,6 +110,15 @@ private: bool HasMemset; bool HasMemsetPattern; bool HasMemcpy; + /// Return code for isLegalStore() + enum LegalStoreKind { + None = 0, + Memset, + MemsetPattern, + Memcpy, + DontUse // Dummy retval never to be used. Allows catching errors in retval + // handling. + }; /// \name Countable Loop Idiom Handling /// @{ @@ -119,8 +128,7 @@ private: SmallVectorImpl<BasicBlock *> &ExitBlocks); void collectStores(BasicBlock *BB); - bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemsetPattern, - bool &ForMemcpy); + LegalStoreKind isLegalStore(StoreInst *SI); bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount, bool ForMemset); bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); @@ -343,20 +351,20 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) { return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C)); } -bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, - bool &ForMemsetPattern, bool &ForMemcpy) { +LoopIdiomRecognize::LegalStoreKind +LoopIdiomRecognize::isLegalStore(StoreInst *SI) { // Don't touch volatile stores. if (!SI->isSimple()) - return false; + return LegalStoreKind::None; // Don't convert stores of non-integral pointer types to memsets (which stores // integers). if (DL->isNonIntegralPointerType(SI->getValueOperand()->getType())) - return false; + return LegalStoreKind::None; // Avoid merging nontemporal stores. if (SI->getMetadata(LLVMContext::MD_nontemporal)) - return false; + return LegalStoreKind::None; Value *StoredVal = SI->getValueOperand(); Value *StorePtr = SI->getPointerOperand(); @@ -364,7 +372,7 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, // Reject stores that are so large that they overflow an unsigned. uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType()); if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) - return false; + return LegalStoreKind::None; // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided store. If we have something else, it's a @@ -372,11 +380,11 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, const SCEVAddRecExpr *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) - return false; + return LegalStoreKind::None; // Check to see if we have a constant stride. if (!isa<SCEVConstant>(StoreEv->getOperand(1))) - return false; + return LegalStoreKind::None; // See if the store can be turned into a memset. @@ -394,15 +402,13 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, // promote the memset. CurLoop->isLoopInvariant(SplatValue)) { // It looks like we can use SplatValue. - ForMemset = true; - return true; + return LegalStoreKind::Memset; } else if (HasMemsetPattern && // Don't create memset_pattern16s with address spaces. StorePtr->getType()->getPointerAddressSpace() == 0 && (PatternValue = getMemSetPatternValue(StoredVal, DL))) { // It looks like we can use PatternValue! - ForMemsetPattern = true; - return true; + return LegalStoreKind::MemsetPattern; } // Otherwise, see if the store can be turned into a memcpy. @@ -412,12 +418,12 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, APInt Stride = getStoreStride(StoreEv); unsigned StoreSize = getStoreSizeInBytes(SI, DL); if (StoreSize != Stride && StoreSize != -Stride) - return false; + return LegalStoreKind::None; // The store must be feeding a non-volatile load. LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand()); if (!LI || !LI->isSimple()) - return false; + return LegalStoreKind::None; // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided load. If we have something else, it's a @@ -425,18 +431,17 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, const SCEVAddRecExpr *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand())); if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) - return false; + return LegalStoreKind::None; // The store and load must share the same stride. if (StoreEv->getOperand(1) != LoadEv->getOperand(1)) - return false; + return LegalStoreKind::None; // Success. This store can be converted into a memcpy. - ForMemcpy = true; - return true; + return LegalStoreKind::Memcpy; } // This store can't be transformed into a memset/memcpy. - return false; + return LegalStoreKind::None; } void LoopIdiomRecognize::collectStores(BasicBlock *BB) { @@ -448,24 +453,28 @@ void LoopIdiomRecognize::collectStores(BasicBlock *BB) { if (!SI) continue; - bool ForMemset = false; - bool ForMemsetPattern = false; - bool ForMemcpy = false; // Make sure this is a strided store with a constant stride. - if (!isLegalStore(SI, ForMemset, ForMemsetPattern, ForMemcpy)) - continue; - - // Save the store locations. - if (ForMemset) { + switch (isLegalStore(SI)) { + case LegalStoreKind::None: + // Nothing to do + break; + case LegalStoreKind::Memset: { // Find the base pointer. Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL); StoreRefsForMemset[Ptr].push_back(SI); - } else if (ForMemsetPattern) { + } break; + case LegalStoreKind::MemsetPattern: { // Find the base pointer. Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL); StoreRefsForMemsetPattern[Ptr].push_back(SI); - } else if (ForMemcpy) + } break; + case LegalStoreKind::Memcpy: StoreRefsForMemcpy.push_back(SI); + break; + default: + assert(false && "unhandled return value"); + break; + } } } diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp index 0ce604429326..32fd3da465fe 100644 --- a/lib/Transforms/Scalar/LoopPredication.cpp +++ b/lib/Transforms/Scalar/LoopPredication.cpp @@ -58,12 +58,30 @@ using namespace llvm; namespace { class LoopPredication { + /// Represents an induction variable check: + /// icmp Pred, <induction variable>, <loop invariant limit> + struct LoopICmp { + ICmpInst::Predicate Pred; + const SCEVAddRecExpr *IV; + const SCEV *Limit; + LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV, + const SCEV *Limit) + : Pred(Pred), IV(IV), Limit(Limit) {} + LoopICmp() {} + }; + ScalarEvolution *SE; Loop *L; const DataLayout *DL; BasicBlock *Preheader; + Optional<LoopICmp> parseLoopICmp(ICmpInst *ICI); + + Value *expandCheck(SCEVExpander &Expander, IRBuilder<> &Builder, + ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, + Instruction *InsertAt); + Optional<Value *> widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander, IRBuilder<> &Builder); bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander); @@ -116,16 +134,10 @@ PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM, return getLoopPassPreservedAnalyses(); } -/// If ICI can be widened to a loop invariant condition emits the loop -/// invariant condition in the loop preheader and return it, otherwise -/// returns None. -Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, - SCEVExpander &Expander, - IRBuilder<> &Builder) { - DEBUG(dbgs() << "Analyzing ICmpInst condition:\n"); - DEBUG(ICI->dump()); - +Optional<LoopPredication::LoopICmp> +LoopPredication::parseLoopICmp(ICmpInst *ICI) { ICmpInst::Predicate Pred = ICI->getPredicate(); + Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); const SCEV *LHSS = SE->getSCEV(LHS); @@ -135,17 +147,54 @@ Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, if (isa<SCEVCouldNotCompute>(RHSS)) return None; - // Canonicalize RHS to be loop invariant bound, LHS - a loop computable index + // Canonicalize RHS to be loop invariant bound, LHS - a loop computable IV if (SE->isLoopInvariant(LHSS, L)) { std::swap(LHS, RHS); std::swap(LHSS, RHSS); Pred = ICmpInst::getSwappedPredicate(Pred); } - if (!SE->isLoopInvariant(RHSS, L) || !isSafeToExpand(RHSS, *SE)) + + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHSS); + if (!AR || AR->getLoop() != L) return None; - const SCEVAddRecExpr *IndexAR = dyn_cast<SCEVAddRecExpr>(LHSS); - if (!IndexAR || IndexAR->getLoop() != L) + return LoopICmp(Pred, AR, RHSS); +} + +Value *LoopPredication::expandCheck(SCEVExpander &Expander, + IRBuilder<> &Builder, + ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS, Instruction *InsertAt) { + Type *Ty = LHS->getType(); + assert(Ty == RHS->getType() && "expandCheck operands have different types?"); + Value *LHSV = Expander.expandCodeFor(LHS, Ty, InsertAt); + Value *RHSV = Expander.expandCodeFor(RHS, Ty, InsertAt); + return Builder.CreateICmp(Pred, LHSV, RHSV); +} + +/// If ICI can be widened to a loop invariant condition emits the loop +/// invariant condition in the loop preheader and return it, otherwise +/// returns None. +Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, + SCEVExpander &Expander, + IRBuilder<> &Builder) { + DEBUG(dbgs() << "Analyzing ICmpInst condition:\n"); + DEBUG(ICI->dump()); + + auto RangeCheck = parseLoopICmp(ICI); + if (!RangeCheck) { + DEBUG(dbgs() << "Failed to parse the loop latch condition!\n"); + return None; + } + + ICmpInst::Predicate Pred = RangeCheck->Pred; + const SCEVAddRecExpr *IndexAR = RangeCheck->IV; + const SCEV *RHSS = RangeCheck->Limit; + + auto CanExpand = [this](const SCEV *S) { + return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE); + }; + if (!CanExpand(RHSS)) return None; DEBUG(dbgs() << "IndexAR: "); @@ -170,17 +219,13 @@ Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, DEBUG(dbgs() << "NewLHSS: "); DEBUG(NewLHSS->dump()); - if (!SE->isLoopInvariant(NewLHSS, L) || !isSafeToExpand(NewLHSS, *SE)) + if (!CanExpand(NewLHSS)) return None; DEBUG(dbgs() << "NewLHSS is loop invariant and safe to expand. Expand!\n"); - Type *Ty = LHS->getType(); Instruction *InsertAt = Preheader->getTerminator(); - assert(Ty == RHS->getType() && "icmp operands have different types?"); - Value *NewLHS = Expander.expandCodeFor(NewLHSS, Ty, InsertAt); - Value *NewRHS = Expander.expandCodeFor(RHSS, Ty, InsertAt); - return Builder.CreateICmp(Pred, NewLHS, NewRHS); + return expandCheck(Expander, Builder, Pred, NewLHSS, RHSS, InsertAt); } bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard, @@ -272,6 +317,9 @@ bool LoopPredication::runOnLoop(Loop *Loop) { if (II->getIntrinsicID() == Intrinsic::experimental_guard) Guards.push_back(II); + if (Guards.empty()) + return false; + SCEVExpander Expander(*SE, *DL, "loop-predication"); bool Changed = false; diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 2ba9265566a8..7312d97f8efe 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -347,7 +347,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // in the map. ValueMap[Inst] = V; if (!C->mayHaveSideEffects()) { - delete C; + C->deleteValue(); C = nullptr; } } else { diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index bd1f21c69eba..28d94497a3ef 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -3805,6 +3805,7 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses)) continue; + F.canonicalize(*L); (void)InsertFormula(LU, LUIdx, F); } } diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index 0e7572f8d2e5..5cfbf6baeaa9 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -30,9 +30,19 @@ /// tracks what operations have a given value number (IE it also tracks the /// reverse mapping from value number -> operations with that value number), so /// that it only needs to reprocess the instructions that are affected when -/// something's value number changes. The rest of the algorithm is devoted to -/// performing symbolic evaluation, forward propagation, and simplification of -/// operations based on the value numbers deduced so far. +/// something's value number changes. The vast majority of complexity and code +/// in this file is devoted to tracking what value numbers could change for what +/// instructions when various things happen. The rest of the algorithm is +/// devoted to performing symbolic evaluation, forward propagation, and +/// simplification of operations based on the value numbers deduced so far +/// +/// In order to make the GVN mostly-complete, we use a technique derived from +/// "Detection of Redundant Expressions: A Complete and Polynomial-time +/// Algorithm in SSA" by R.R. Pai. The source of incompleteness in most SSA +/// based GVN algorithms is related to their inability to detect equivalence +/// between phi of ops (IE phi(a+b, c+d)) and op of phis (phi(a,c) + phi(b, d)). +/// We resolve this issue by generating the equivalent "phi of ops" form for +/// each op of phis we see, in a way that only takes polynomial time to resolve. /// /// We also do not perform elimination by using any published algorithm. All /// published algorithms are O(Instructions). Instead, we use a technique that @@ -51,7 +61,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -104,12 +113,14 @@ STATISTIC(NumGVNLeaderChanges, "Number of leader changes"); STATISTIC(NumGVNSortedLeaderChanges, "Number of sorted leader changes"); STATISTIC(NumGVNAvoidedSortedLeaderChanges, "Number of avoided sorted leader changes"); -STATISTIC(NumGVNNotMostDominatingLeader, - "Number of times a member dominated it's new classes' leader"); STATISTIC(NumGVNDeadStores, "Number of redundant/dead stores eliminated"); +STATISTIC(NumGVNPHIOfOpsCreated, "Number of PHI of ops created"); +STATISTIC(NumGVNPHIOfOpsEliminations, + "Number of things eliminated using PHI of ops"); DEBUG_COUNTER(VNCounter, "newgvn-vn", "Controls which instructions are value numbered") - +DEBUG_COUNTER(PHIOfOpsCounter, "newgvn-phi", + "Controls which instructions we create phi of ops for") // Currently store defining access refinement is too slow due to basicaa being // egregiously slow. This flag lets us keep it working while we work on this // issue. @@ -172,10 +183,9 @@ private: } } // See if we really were the root of a component, by seeing if we still have - // our DFSNumber. - // If we do, we are the root of the component, and we have completed a - // component. If we do not, - // we are not the root of a component, and belong on the component stack. + // our DFSNumber. If we do, we are the root of the component, and we have + // completed a component. If we do not, we are not the root of a component, + // and belong on the component stack. if (Root.lookup(I) == OurDFS) { unsigned ComponentID = Components.size(); Components.resize(Components.size() + 1); @@ -367,6 +377,7 @@ private: int StoreCount = 0; }; +struct HashedExpression; namespace llvm { template <> struct DenseMapInfo<const Expression *> { static const Expression *getEmptyKey() { @@ -379,9 +390,11 @@ template <> struct DenseMapInfo<const Expression *> { Val <<= PointerLikeTypeTraits<const Expression *>::NumLowBitsAvailable; return reinterpret_cast<const Expression *>(Val); } - static unsigned getHashValue(const Expression *V) { - return static_cast<unsigned>(V->getHashValue()); + static unsigned getHashValue(const Expression *E) { + return static_cast<unsigned>(E->getHashValue()); } + static unsigned getHashValue(const HashedExpression &HE); + static bool isEqual(const HashedExpression &LHS, const Expression *RHS); static bool isEqual(const Expression *LHS, const Expression *RHS) { if (LHS == RHS) return true; @@ -393,6 +406,26 @@ template <> struct DenseMapInfo<const Expression *> { }; } // end namespace llvm +// This is just a wrapper around Expression that computes the hash value once at +// creation time. Hash values for an Expression can't change once they are +// inserted into the DenseMap (it breaks DenseMap), so they must be immutable at +// that point anyway. +struct HashedExpression { + const Expression *E; + unsigned HashVal; + HashedExpression(const Expression *E) + : E(E), HashVal(DenseMapInfo<const Expression *>::getHashValue(E)) {} +}; + +unsigned +DenseMapInfo<const Expression *>::getHashValue(const HashedExpression &HE) { + return HE.HashVal; +} +bool DenseMapInfo<const Expression *>::isEqual(const HashedExpression &LHS, + const Expression *RHS) { + return isEqual(LHS.E, RHS); +} + namespace { class NewGVN { Function &F; @@ -430,6 +463,33 @@ class NewGVN { // Value Mappings. DenseMap<Value *, CongruenceClass *> ValueToClass; DenseMap<Value *, const Expression *> ValueToExpression; + // Value PHI handling, used to make equivalence between phi(op, op) and + // op(phi, phi). + // These mappings just store various data that would normally be part of the + // IR. + DenseSet<const Instruction *> PHINodeUses; + // Map a temporary instruction we created to a parent block. + DenseMap<const Value *, BasicBlock *> TempToBlock; + // Map between the temporary phis we created and the real instructions they + // are known equivalent to. + DenseMap<const Value *, PHINode *> RealToTemp; + // In order to know when we should re-process instructions that have + // phi-of-ops, we track the set of expressions that they needed as + // leaders. When we discover new leaders for those expressions, we process the + // associated phi-of-op instructions again in case they have changed. The + // other way they may change is if they had leaders, and those leaders + // disappear. However, at the point they have leaders, there are uses of the + // relevant operands in the created phi node, and so they will get reprocessed + // through the normal user marking we perform. + mutable DenseMap<const Value *, SmallPtrSet<Value *, 2>> AdditionalUsers; + DenseMap<const Expression *, SmallPtrSet<Instruction *, 2>> + ExpressionToPhiOfOps; + // Map from basic block to the temporary operations we created + DenseMap<const BasicBlock *, SmallVector<PHINode *, 8>> PHIOfOpsPHIs; + // Map from temporary operation to MemoryAccess. + DenseMap<const Instruction *, MemoryUseOrDef *> TempToMemory; + // Set of all temporary instructions we created. + DenseSet<Instruction *> AllTempInstructions; // Mapping from predicate info we used to the instructions we used it with. // In order to correctly ensure propagation, we must keep track of what @@ -462,12 +522,19 @@ class NewGVN { enum MemoryPhiState { MPS_Invalid, MPS_TOP, MPS_Equivalent, MPS_Unique }; DenseMap<const MemoryPhi *, MemoryPhiState> MemoryPhiState; - enum PhiCycleState { PCS_Unknown, PCS_CycleFree, PCS_Cycle }; - mutable DenseMap<const PHINode *, PhiCycleState> PhiCycleState; + enum InstCycleState { ICS_Unknown, ICS_CycleFree, ICS_Cycle }; + mutable DenseMap<const Instruction *, InstCycleState> InstCycleState; // Expression to class mapping. using ExpressionClassMap = DenseMap<const Expression *, CongruenceClass *>; ExpressionClassMap ExpressionToClass; + // We have a single expression that represents currently DeadExpressions. + // For dead expressions we can prove will stay dead, we mark them with + // DFS number zero. However, it's possible in the case of phi nodes + // for us to assume/prove all arguments are dead during fixpointing. + // We use DeadExpression for that case. + DeadExpression *SingletonDeadExpression = nullptr; + // Which values have changed as a result of leader changes. SmallPtrSet<Value *, 8> LeaderChanges; @@ -521,7 +588,8 @@ private: const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *) const; PHIExpression *createPHIExpression(Instruction *, bool &HasBackEdge, - bool &AllConstant) const; + bool &OriginalOpsConstant) const; + const DeadExpression *createDeadExpression() const; const VariableExpression *createVariableExpression(Value *) const; const ConstantExpression *createConstantExpression(Constant *) const; const Expression *createVariableOrConstant(Value *V) const; @@ -562,6 +630,9 @@ private: return CClass; } void initializeCongruenceClasses(Function &F); + const Expression *makePossiblePhiOfOps(Instruction *, bool, + SmallPtrSetImpl<Value *> &); + void addPhiOfOps(PHINode *Op, BasicBlock *BB, Instruction *ExistingValue); // Value number an Instruction or MemoryPhi. void valueNumberMemoryPhi(MemoryPhi *); @@ -570,7 +641,8 @@ private: // Symbolic evaluation. const Expression *checkSimplificationResults(Expression *, Instruction *, Value *) const; - const Expression *performSymbolicEvaluation(Value *) const; + const Expression *performSymbolicEvaluation(Value *, + SmallPtrSetImpl<Value *> &) const; const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *, Instruction *, MemoryAccess *) const; @@ -595,7 +667,7 @@ private: bool setMemoryClass(const MemoryAccess *From, CongruenceClass *To); CongruenceClass *getMemoryClass(const MemoryAccess *MA) const; const MemoryAccess *lookupMemoryLeader(const MemoryAccess *) const; - bool isMemoryAccessTop(const MemoryAccess *) const; + bool isMemoryAccessTOP(const MemoryAccess *) const; // Ranking unsigned int getRank(const Value *) const; @@ -619,19 +691,26 @@ private: void replaceInstruction(Instruction *, Value *); void markInstructionForDeletion(Instruction *); void deleteInstructionsInBlock(BasicBlock *); + Value *findPhiOfOpsLeader(const Expression *E, const BasicBlock *BB) const; // New instruction creation. void handleNewInstruction(Instruction *){}; // Various instruction touch utilities + template <typename Map, typename KeyType, typename Func> + void for_each_found(Map &, const KeyType &, Func); + template <typename Map, typename KeyType> + void touchAndErase(Map &, const KeyType &); void markUsersTouched(Value *); void markMemoryUsersTouched(const MemoryAccess *); void markMemoryDefTouched(const MemoryAccess *); void markPredicateUsersTouched(Instruction *); void markValueLeaderChangeTouched(CongruenceClass *CC); void markMemoryLeaderChangeTouched(CongruenceClass *CC); + void markPhiOfOpsChanged(const HashedExpression &HE); void addPredicateUsers(const PredicateBase *, Instruction *) const; void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const; + void addAdditionalUsers(Value *To, Value *User) const; // Main loop of value numbering void iterateTouchedInstructions(); @@ -639,13 +718,18 @@ private: // Utilities. void cleanupTables(); std::pair<unsigned, unsigned> assignDFSNumbers(BasicBlock *, unsigned); - void updateProcessedCount(Value *V); + void updateProcessedCount(const Value *V); void verifyMemoryCongruency() const; void verifyIterationSettled(Function &F); void verifyStoreExpressions() const; - bool singleReachablePHIPath(const MemoryAccess *, const MemoryAccess *) const; + bool singleReachablePHIPath(SmallPtrSet<const MemoryAccess *, 8> &, + const MemoryAccess *, const MemoryAccess *) const; BasicBlock *getBlockForValue(Value *V) const; void deleteExpression(const Expression *E) const; + MemoryUseOrDef *getMemoryAccess(const Instruction *) const; + MemoryAccess *getDefiningAccess(const MemoryAccess *) const; + MemoryPhi *getMemoryAccess(const BasicBlock *) const; + template <class T, class Range> T *getMinDFSOfRange(const Range &) const; unsigned InstrToDFSNum(const Value *V) const { assert(isa<Instruction>(V) && "This should not be used for MemoryAccesses"); return InstrDFS.lookup(V); @@ -665,8 +749,8 @@ private: ? InstrToDFSNum(cast<MemoryUseOrDef>(MA)->getMemoryInst()) : InstrDFS.lookup(MA); } - bool isCycleFree(const PHINode *PN) const; - template <class T, class Range> T *getMinDFSOfRange(const Range &) const; + bool isCycleFree(const Instruction *) const; + bool isBackedge(BasicBlock *From, BasicBlock *To) const; // Debug counter info. When verifying, we have to reset the value numbering // debug counter to the same state it started in to get the same results. std::pair<int, int> StartingVNCounter; @@ -694,20 +778,46 @@ bool StoreExpression::equals(const Expression &Other) const { return true; } +// Determine if the edge From->To is a backedge +bool NewGVN::isBackedge(BasicBlock *From, BasicBlock *To) const { + if (From == To) + return true; + auto *FromDTN = DT->getNode(From); + auto *ToDTN = DT->getNode(To); + return RPOOrdering.lookup(FromDTN) >= RPOOrdering.lookup(ToDTN); +} + #ifndef NDEBUG static std::string getBlockName(const BasicBlock *B) { return DOTGraphTraits<const Function *>::getSimpleNodeLabel(B, nullptr); } #endif +// Get a MemoryAccess for an instruction, fake or real. +MemoryUseOrDef *NewGVN::getMemoryAccess(const Instruction *I) const { + auto *Result = MSSA->getMemoryAccess(I); + return Result ? Result : TempToMemory.lookup(I); +} + +// Get a MemoryPhi for a basic block. These are all real. +MemoryPhi *NewGVN::getMemoryAccess(const BasicBlock *BB) const { + return MSSA->getMemoryAccess(BB); +} + // Get the basic block from an instruction/memory value. BasicBlock *NewGVN::getBlockForValue(Value *V) const { - if (auto *I = dyn_cast<Instruction>(V)) - return I->getParent(); - else if (auto *MP = dyn_cast<MemoryPhi>(V)) - return MP->getBlock(); - llvm_unreachable("Should have been able to figure out a block for our value"); - return nullptr; + if (auto *I = dyn_cast<Instruction>(V)) { + auto *Parent = I->getParent(); + if (Parent) + return Parent; + Parent = TempToBlock.lookup(V); + assert(Parent && "Every fake instruction should have a block"); + return Parent; + } + + auto *MP = dyn_cast<MemoryPhi>(V); + assert(MP && "Should have been an instruction or a MemoryPhi"); + return MP->getBlock(); } // Delete a definitely dead expression, so it can be reused by the expression @@ -719,10 +829,9 @@ void NewGVN::deleteExpression(const Expression *E) const { const_cast<BasicExpression *>(BE)->deallocateOperands(ArgRecycler); ExpressionAllocator.Deallocate(E); } - PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge, - bool &AllConstant) const { - BasicBlock *PHIBlock = I->getParent(); + bool &OriginalOpsConstant) const { + BasicBlock *PHIBlock = getBlockForValue(I); auto *PN = cast<PHINode>(I); auto *E = new (ExpressionAllocator) PHIExpression(PN->getNumOperands(), PHIBlock); @@ -731,8 +840,6 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge, E->setType(I->getType()); E->setOpcode(I->getOpcode()); - unsigned PHIRPO = RPOOrdering.lookup(DT->getNode(PHIBlock)); - // NewGVN assumes the operands of a PHI node are in a consistent order across // PHIs. LLVM doesn't seem to always guarantee this. While we need to fix // this in LLVM at some point we don't want GVN to find wrong congruences. @@ -753,18 +860,20 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge, auto Filtered = make_filter_range(PHIOperands, [&](const Use *U) { return ReachableEdges.count({PN->getIncomingBlock(*U), PHIBlock}); }); - std::transform(Filtered.begin(), Filtered.end(), op_inserter(E), [&](const Use *U) -> Value * { auto *BB = PN->getIncomingBlock(*U); - auto *DTN = DT->getNode(BB); - if (RPOOrdering.lookup(DTN) >= PHIRPO) - HasBackedge = true; - AllConstant &= isa<UndefValue>(*U) || isa<Constant>(*U); - - // Don't try to transform self-defined phis. + HasBackedge = HasBackedge || isBackedge(BB, PHIBlock); + OriginalOpsConstant = + OriginalOpsConstant && isa<Constant>(*U); + // Use nullptr to distinguish between things that were + // originally self-defined and those that have an operand + // leader that is self-defined. if (*U == PN) - return PN; + return nullptr; + // Things in TOPClass are equivalent to everything. + if (ValueToClass.lookup(*U) == TOPClass) + return nullptr; return lookupOperandLeader(*U); }); return E; @@ -785,7 +894,7 @@ bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) const { // whether all members are constant. std::transform(I->op_begin(), I->op_end(), op_inserter(E), [&](Value *O) { auto Operand = lookupOperandLeader(O); - AllConstant &= isa<Constant>(Operand); + AllConstant = AllConstant && isa<Constant>(Operand); return Operand; }); @@ -848,7 +957,7 @@ const Expression *NewGVN::checkSimplificationResults(Expression *E, if (CC && CC->getDefiningExpr()) { if (I) DEBUG(dbgs() << "Simplified " << *I << " to " - << " expression " << *V << "\n"); + << " expression " << *CC->getDefiningExpr() << "\n"); NumGVNOpsSimplified++; deleteExpression(E); return CC->getDefiningExpr(); @@ -961,6 +1070,12 @@ NewGVN::createAggregateValueExpression(Instruction *I) const { llvm_unreachable("Unhandled type of aggregate value operation"); } +const DeadExpression *NewGVN::createDeadExpression() const { + // DeadExpression has no arguments and all DeadExpression's are the same, + // so we only need one of them. + return SingletonDeadExpression; +} + const VariableExpression *NewGVN::createVariableExpression(Value *V) const { auto *E = new (ExpressionAllocator) VariableExpression(V); E->setOpcode(V->getValueID()); @@ -1032,7 +1147,7 @@ bool NewGVN::someEquivalentDominates(const Instruction *Inst, Value *NewGVN::lookupOperandLeader(Value *V) const { CongruenceClass *CC = ValueToClass.lookup(V); if (CC) { - // Everything in TOP is represneted by undef, as it can be any value. + // Everything in TOP is represented by undef, as it can be any value. // We do have to make sure we get the type right though, so we can't set the // RepLeader to undef. if (CC == TOPClass) @@ -1054,7 +1169,7 @@ const MemoryAccess *NewGVN::lookupMemoryLeader(const MemoryAccess *MA) const { // Return true if the MemoryAccess is really equivalent to everything. This is // equivalent to the lattice value "TOP" in most lattices. This is the initial // state of all MemoryAccesses. -bool NewGVN::isMemoryAccessTop(const MemoryAccess *MA) const { +bool NewGVN::isMemoryAccessTOP(const MemoryAccess *MA) const { return getMemoryClass(MA) == TOPClass; } @@ -1100,7 +1215,7 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const { // Unlike loads, we never try to eliminate stores, so we do not check if they // are simple and avoid value numbering them. auto *SI = cast<StoreInst>(I); - auto *StoreAccess = MSSA->getMemoryAccess(SI); + auto *StoreAccess = getMemoryAccess(SI); // Get the expression, if any, for the RHS of the MemoryDef. const MemoryAccess *StoreRHS = StoreAccess->getDefiningAccess(); if (EnableStoreRefinement) @@ -1108,7 +1223,6 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const { // If we bypassed the use-def chains, make sure we add a use. if (StoreRHS != StoreAccess->getDefiningAccess()) addMemoryUsers(StoreRHS, StoreAccess); - StoreRHS = lookupMemoryLeader(StoreRHS); // If we are defined by ourselves, use the live on entry def. if (StoreRHS == StoreAccess) @@ -1137,9 +1251,9 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const { dyn_cast<LoadInst>(lookupOperandLeader(SI->getValueOperand()))) { if ((lookupOperandLeader(LI->getPointerOperand()) == lookupOperandLeader(SI->getPointerOperand())) && - (lookupMemoryLeader(MSSA->getMemoryAccess(LI)->getDefiningAccess()) == + (lookupMemoryLeader(getMemoryAccess(LI)->getDefiningAccess()) == StoreRHS)) - return createVariableExpression(LI); + return createStoreExpression(SI, StoreRHS); } } @@ -1241,8 +1355,9 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const { // Load of undef is undef. if (isa<UndefValue>(LoadAddressLeader)) return createConstantExpression(UndefValue::get(LI->getType())); - - MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(I); + MemoryAccess *OriginalAccess = getMemoryAccess(I); + MemoryAccess *DefiningAccess = + MSSAWalker->getClobberingMemoryAccess(OriginalAccess); if (!MSSA->isLiveOnEntryDef(DefiningAccess)) { if (auto *MD = dyn_cast<MemoryDef>(DefiningAccess)) { @@ -1331,6 +1446,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const { // operands are equal, because assumes must always be true. if (CmpInst::isTrueWhenEqual(Predicate)) { addPredicateUsers(PI, I); + addAdditionalUsers(Cmp->getOperand(0), I); return createVariableOrConstant(FirstOp); } } @@ -1343,6 +1459,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const { if ((PBranch->TrueEdge && Predicate == CmpInst::ICMP_EQ) || (!PBranch->TrueEdge && Predicate == CmpInst::ICMP_NE)) { addPredicateUsers(PI, I); + addAdditionalUsers(Cmp->getOperand(0), I); return createVariableOrConstant(FirstOp); } // Handle the special case of floating point. @@ -1350,6 +1467,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const { (!PBranch->TrueEdge && Predicate == CmpInst::FCMP_UNE)) && isa<ConstantFP>(FirstOp) && !cast<ConstantFP>(FirstOp)->isZero()) { addPredicateUsers(PI, I); + addAdditionalUsers(Cmp->getOperand(0), I); return createConstantExpression(cast<Constant>(FirstOp)); } } @@ -1430,34 +1548,33 @@ bool NewGVN::setMemoryClass(const MemoryAccess *From, return Changed; } -// Determine if a phi is cycle-free. That means the values in the phi don't -// depend on any expressions that can change value as a result of the phi. -// For example, a non-cycle free phi would be v = phi(0, v+1). -bool NewGVN::isCycleFree(const PHINode *PN) const { - // In order to compute cycle-freeness, we do SCC finding on the phi, and see - // what kind of SCC it ends up in. If it is a singleton, it is cycle-free. - // If it is not in a singleton, it is only cycle free if the other members are - // all phi nodes (as they do not compute anything, they are copies). TODO: - // There are likely a few other intrinsics or expressions that could be - // included here, but this happens so infrequently already that it is not - // likely to be worth it. - auto PCS = PhiCycleState.lookup(PN); - if (PCS == PCS_Unknown) { - SCCFinder.Start(PN); - auto &SCC = SCCFinder.getComponentFor(PN); +// Determine if a instruction is cycle-free. That means the values in the +// instruction don't depend on any expressions that can change value as a result +// of the instruction. For example, a non-cycle free instruction would be v = +// phi(0, v+1). +bool NewGVN::isCycleFree(const Instruction *I) const { + // In order to compute cycle-freeness, we do SCC finding on the instruction, + // and see what kind of SCC it ends up in. If it is a singleton, it is + // cycle-free. If it is not in a singleton, it is only cycle free if the + // other members are all phi nodes (as they do not compute anything, they are + // copies). + auto ICS = InstCycleState.lookup(I); + if (ICS == ICS_Unknown) { + SCCFinder.Start(I); + auto &SCC = SCCFinder.getComponentFor(I); // It's cycle free if it's size 1 or or the SCC is *only* phi nodes. if (SCC.size() == 1) - PhiCycleState.insert({PN, PCS_CycleFree}); + InstCycleState.insert({I, ICS_CycleFree}); else { bool AllPhis = llvm::all_of(SCC, [](const Value *V) { return isa<PHINode>(V); }); - PCS = AllPhis ? PCS_CycleFree : PCS_Cycle; + ICS = AllPhis ? ICS_CycleFree : ICS_Cycle; for (auto *Member : SCC) if (auto *MemberPhi = dyn_cast<PHINode>(Member)) - PhiCycleState.insert({MemberPhi, PCS}); + InstCycleState.insert({MemberPhi, ICS}); } } - if (PCS == PCS_Cycle) + if (ICS == ICS_Cycle) return false; return true; } @@ -1467,10 +1584,9 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const { // True if one of the incoming phi edges is a backedge. bool HasBackedge = false; // All constant tracks the state of whether all the *original* phi operands - // were constant. This is really shorthand for "this phi cannot cycle due - // to forward propagation", as any change in value of the phi is guaranteed - // not to later change the value of the phi. - // IE it can't be v = phi(undef, v+1) + // This is really shorthand for "this phi cannot cycle due to forward + // change in value of the phi is guaranteed not to later change the value of + // the phi. IE it can't be v = phi(undef, v+1) bool AllConstant = true; auto *E = cast<PHIExpression>(createPHIExpression(I, HasBackedge, AllConstant)); @@ -1478,8 +1594,16 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const { // See if all arguments are the same. // We track if any were undef because they need special handling. bool HasUndef = false; - auto Filtered = make_filter_range(E->operands(), [&](const Value *Arg) { - if (Arg == I) + bool CycleFree = isCycleFree(I); + auto Filtered = make_filter_range(E->operands(), [&](Value *Arg) { + if (Arg == nullptr) + return false; + // Original self-operands are already eliminated during expression creation. + // We can only eliminate value-wise self-operands if it's cycle + // free. Otherwise, eliminating the operand can cause our value to change, + // which can cause us to not eliminate the operand, which changes the value + // back to what it was before, cycling forever. + if (CycleFree && Arg == I) return false; if (isa<UndefValue>(Arg)) { HasUndef = true; @@ -1487,20 +1611,19 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const { } return true; }); - // If we are left with no operands, it's undef + // If we are left with no operands, it's dead. if (Filtered.begin() == Filtered.end()) { - DEBUG(dbgs() << "Simplified PHI node " << *I << " to undef" - << "\n"); + DEBUG(dbgs() << "No arguments of PHI node " << *I << " are live\n"); deleteExpression(E); - return createConstantExpression(UndefValue::get(I->getType())); + return createDeadExpression(); } unsigned NumOps = 0; Value *AllSameValue = *(Filtered.begin()); ++Filtered.begin(); // Can't use std::equal here, sadly, because filter.begin moves. - if (llvm::all_of(Filtered, [AllSameValue, &NumOps](const Value *V) { + if (llvm::all_of(Filtered, [&](Value *Arg) { ++NumOps; - return V == AllSameValue; + return Arg == AllSameValue; })) { // In LLVM's non-standard representation of phi nodes, it's possible to have // phi nodes with cycles (IE dependent on other phis that are .... dependent @@ -1519,7 +1642,7 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const { // constants, or all operands are ignored but the undef, it also must be // cycle free. if (!AllConstant && HasBackedge && NumOps > 0 && - !isa<UndefValue>(AllSameValue) && !isCycleFree(cast<PHINode>(I))) + !isa<UndefValue>(AllSameValue) && !CycleFree) return E; // Only have to check for instructions @@ -1690,8 +1813,18 @@ const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) const { return createExpression(I); } +// Return true if V is a value that will always be available (IE can +// be placed anywhere) in the function. We don't do globals here +// because they are often worse to put in place. +// TODO: Separate cost from availability +static bool alwaysAvailable(Value *V) { + return isa<Constant>(V) || isa<Argument>(V); +} + // Substitute and symbolize the value before value numbering. -const Expression *NewGVN::performSymbolicEvaluation(Value *V) const { +const Expression * +NewGVN::performSymbolicEvaluation(Value *V, + SmallPtrSetImpl<Value *> &Visited) const { const Expression *E = nullptr; if (auto *C = dyn_cast<Constant>(V)) E = createConstantExpression(C); @@ -1769,12 +1902,39 @@ const Expression *NewGVN::performSymbolicEvaluation(Value *V) const { return E; } +// Look up a container in a map, and then call a function for each thing in the +// found container. +template <typename Map, typename KeyType, typename Func> +void NewGVN::for_each_found(Map &M, const KeyType &Key, Func F) { + const auto Result = M.find_as(Key); + if (Result != M.end()) + for (typename Map::mapped_type::value_type Mapped : Result->second) + F(Mapped); +} + +// Look up a container of values/instructions in a map, and touch all the +// instructions in the container. Then erase value from the map. +template <typename Map, typename KeyType> +void NewGVN::touchAndErase(Map &M, const KeyType &Key) { + const auto Result = M.find_as(Key); + if (Result != M.end()) { + for (const typename Map::mapped_type::value_type Mapped : Result->second) + TouchedInstructions.set(InstrToDFSNum(Mapped)); + M.erase(Result); + } +} + +void NewGVN::addAdditionalUsers(Value *To, Value *User) const { + AdditionalUsers[To].insert(User); +} + void NewGVN::markUsersTouched(Value *V) { // Now mark the users as touched. for (auto *User : V->users()) { assert(isa<Instruction>(User) && "Use of value not within an instruction?"); TouchedInstructions.set(InstrToDFSNum(User)); } + touchAndErase(AdditionalUsers, V); } void NewGVN::addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const { @@ -1791,16 +1951,15 @@ void NewGVN::markMemoryUsersTouched(const MemoryAccess *MA) { return; for (auto U : MA->users()) TouchedInstructions.set(MemoryToDFSNum(U)); - const auto Result = MemoryToUsers.find(MA); - if (Result != MemoryToUsers.end()) { - for (auto *User : Result->second) - TouchedInstructions.set(MemoryToDFSNum(User)); - MemoryToUsers.erase(Result); - } + touchAndErase(MemoryToUsers, MA); } // Add I to the set of users of a given predicate. void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) const { + // Don't add temporary instructions to the user lists. + if (AllTempInstructions.count(I)) + return; + if (auto *PBranch = dyn_cast<PredicateBranch>(PB)) PredicateToUsers[PBranch->Condition].insert(I); else if (auto *PAssume = dyn_cast<PredicateBranch>(PB)) @@ -1809,12 +1968,7 @@ void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) const { // Touch all the predicates that depend on this instruction. void NewGVN::markPredicateUsersTouched(Instruction *I) { - const auto Result = PredicateToUsers.find(I); - if (Result != PredicateToUsers.end()) { - for (auto *User : Result->second) - TouchedInstructions.set(InstrToDFSNum(User)); - PredicateToUsers.erase(Result); - } + touchAndErase(PredicateToUsers, I); } // Mark users affected by a memory leader change. @@ -1856,11 +2010,11 @@ const MemoryAccess *NewGVN::getNextMemoryLeader(CongruenceClass *CC) const { assert(!CC->definesNoMemory() && "Can't get next leader if there is none"); if (CC->getStoreCount() > 0) { if (auto *NL = dyn_cast_or_null<StoreInst>(CC->getNextLeader().first)) - return MSSA->getMemoryAccess(NL); + return getMemoryAccess(NL); // Find the store with the minimum DFS number. auto *V = getMinDFSOfRange<Value>(make_filter_range( *CC, [&](const Value *V) { return isa<StoreInst>(V); })); - return MSSA->getMemoryAccess(cast<StoreInst>(V)); + return getMemoryAccess(cast<StoreInst>(V)); } assert(CC->getStoreCount() == 0); @@ -1945,31 +2099,11 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, if (I == OldClass->getNextLeader().first) OldClass->resetNextLeader(); - // It's possible, though unlikely, for us to discover equivalences such - // that the current leader does not dominate the old one. - // This statistic tracks how often this happens. - // We assert on phi nodes when this happens, currently, for debugging, because - // we want to make sure we name phi node cycles properly. - if (isa<Instruction>(NewClass->getLeader()) && NewClass->getLeader() && - I != NewClass->getLeader()) { - auto *IBB = I->getParent(); - auto *NCBB = cast<Instruction>(NewClass->getLeader())->getParent(); - bool Dominated = - IBB == NCBB && InstrToDFSNum(I) < InstrToDFSNum(NewClass->getLeader()); - Dominated = Dominated || DT->properlyDominates(IBB, NCBB); - if (Dominated) { - ++NumGVNNotMostDominatingLeader; - assert( - !isa<PHINode>(I) && - "New class for instruction should not be dominated by instruction"); - } - } + OldClass->erase(I); + NewClass->insert(I); if (NewClass->getLeader() != I) NewClass->addPossibleNextLeader({I, InstrToDFSNum(I)}); - - OldClass->erase(I); - NewClass->insert(I); // Handle our special casing of stores. if (auto *SI = dyn_cast<StoreInst>(I)) { OldClass->decStoreCount(); @@ -1984,7 +2118,6 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, // If it's a store expression we are using, it means we are not equivalent // to something earlier. if (auto *SE = dyn_cast<StoreExpression>(E)) { - assert(SE->getStoredValue() != NewClass->getLeader()); NewClass->setStoredValue(SE->getStoredValue()); markValueLeaderChangeTouched(NewClass); // Shift the new class leader to be the store @@ -2003,7 +2136,7 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, // instructions before. // If it's not a memory use, set the MemoryAccess equivalence - auto *InstMA = dyn_cast_or_null<MemoryDef>(MSSA->getMemoryAccess(I)); + auto *InstMA = dyn_cast_or_null<MemoryDef>(getMemoryAccess(I)); if (InstMA) moveMemoryToNewCongruenceClass(I, InstMA, OldClass, NewClass); ValueToClass[I] = NewClass; @@ -2035,21 +2168,31 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, } } +// For a given expression, mark the phi of ops instructions that could have +// changed as a result. +void NewGVN::markPhiOfOpsChanged(const HashedExpression &HE) { + touchAndErase(ExpressionToPhiOfOps, HE); +} + // Perform congruence finding on a given value numbering expression. void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) { // This is guaranteed to return something, since it will at least find // TOP. - CongruenceClass *IClass = ValueToClass[I]; + CongruenceClass *IClass = ValueToClass.lookup(I); assert(IClass && "Should have found a IClass"); // Dead classes should have been eliminated from the mapping. assert(!IClass->isDead() && "Found a dead class"); - CongruenceClass *EClass; + CongruenceClass *EClass = nullptr; + HashedExpression HE(E); if (const auto *VE = dyn_cast<VariableExpression>(E)) { - EClass = ValueToClass[VE->getVariableValue()]; - } else { - auto lookupResult = ExpressionToClass.insert({E, nullptr}); + EClass = ValueToClass.lookup(VE->getVariableValue()); + } else if (isa<DeadExpression>(E)) { + EClass = TOPClass; + } + if (!EClass) { + auto lookupResult = ExpressionToClass.insert_as({E, nullptr}, HE); // If it's not in the value table, create a new congruence class. if (lookupResult.second) { @@ -2098,10 +2241,13 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) { if (ClassChanged || LeaderChanged) { DEBUG(dbgs() << "New class " << EClass->getID() << " for expression " << *E << "\n"); - if (ClassChanged) + if (ClassChanged) { moveValueToNewCongruenceClass(I, E, IClass, EClass); + markPhiOfOpsChanged(HE); + } + markUsersTouched(I); - if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) + if (MemoryAccess *MA = getMemoryAccess(I)) markMemoryUsersTouched(MA); if (auto *CI = dyn_cast<CmpInst>(I)) markPredicateUsersTouched(CI); @@ -2110,11 +2256,11 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) { // old store expression. In particular, loads do not compare against stored // value, so they will find old store expressions (and associated class // mappings) if we leave them in the table. - if (ClassChanged && isa<StoreExpression>(E)) { + if (ClassChanged && isa<StoreInst>(I)) { auto *OldE = ValueToExpression.lookup(I); // It could just be that the old class died. We don't want to erase it if we // just moved classes. - if (OldE && isa<StoreExpression>(OldE) && !OldE->equals(*E)) + if (OldE && isa<StoreExpression>(OldE) && *E != *OldE) ExpressionToClass.erase(OldE); } ValueToExpression[I] = E; @@ -2139,7 +2285,7 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) { // impact predicates. Otherwise, only mark the phi nodes as touched, as // they are the only thing that depend on new edges. Anything using their // values will get propagated to if necessary. - if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(To)) + if (MemoryAccess *MemPhi = getMemoryAccess(To)) TouchedInstructions.set(InstrToDFSNum(MemPhi)); auto BI = To->begin(); @@ -2147,6 +2293,9 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) { TouchedInstructions.set(InstrToDFSNum(&*BI)); ++BI; } + for_each_found(PHIOfOpsPHIs, To, [&](const PHINode *I) { + TouchedInstructions.set(InstrToDFSNum(I)); + }); } } } @@ -2236,7 +2385,7 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) { // This also may be a memory defining terminator, in which case, set it // equivalent only to itself. // - auto *MA = MSSA->getMemoryAccess(TI); + auto *MA = getMemoryAccess(TI); if (MA && !isa<MemoryUse>(MA)) { auto *CC = ensureLeaderOfMemoryClass(MA); if (setMemoryClass(MA, CC)) @@ -2245,6 +2394,158 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) { } } +void NewGVN::addPhiOfOps(PHINode *Op, BasicBlock *BB, + Instruction *ExistingValue) { + InstrDFS[Op] = InstrToDFSNum(ExistingValue); + AllTempInstructions.insert(Op); + PHIOfOpsPHIs[BB].push_back(Op); + TempToBlock[Op] = BB; + if (ExistingValue) + RealToTemp[ExistingValue] = Op; +} + +static bool okayForPHIOfOps(const Instruction *I) { + return isa<BinaryOperator>(I) || isa<SelectInst>(I) || isa<CmpInst>(I) || + isa<LoadInst>(I); +} + +// When we see an instruction that is an op of phis, generate the equivalent phi +// of ops form. +const Expression * +NewGVN::makePossiblePhiOfOps(Instruction *I, bool HasBackedge, + SmallPtrSetImpl<Value *> &Visited) { + if (!okayForPHIOfOps(I)) + return nullptr; + + if (!Visited.insert(I).second) + return nullptr; + // For now, we require the instruction be cycle free because we don't + // *always* create a phi of ops for instructions that could be done as phi + // of ops, we only do it if we think it is useful. If we did do it all the + // time, we could remove the cycle free check. + if (!isCycleFree(I)) + return nullptr; + + unsigned IDFSNum = InstrToDFSNum(I); + // Pretty much all of the instructions we can convert to phi of ops over a + // backedge that are adds, are really induction variables, and those are + // pretty much pointless to convert. This is very coarse-grained for a + // test, so if we do find some value, we can change it later. + // But otherwise, what can happen is we convert the induction variable from + // + // i = phi (0, tmp) + // tmp = i + 1 + // + // to + // i = phi (0, tmpphi) + // tmpphi = phi(1, tmpphi+1) + // + // Which we don't want to happen. We could just avoid this for all non-cycle + // free phis, and we made go that route. + if (HasBackedge && I->getOpcode() == Instruction::Add) + return nullptr; + + SmallPtrSet<const Value *, 8> ProcessedPHIs; + // TODO: We don't do phi translation on memory accesses because it's + // complicated. For a load, we'd need to be able to simulate a new memoryuse, + // which we don't have a good way of doing ATM. + auto *MemAccess = getMemoryAccess(I); + // If the memory operation is defined by a memory operation this block that + // isn't a MemoryPhi, transforming the pointer backwards through a scalar phi + // can't help, as it would still be killed by that memory operation. + if (MemAccess && !isa<MemoryPhi>(MemAccess->getDefiningAccess()) && + MemAccess->getDefiningAccess()->getBlock() == I->getParent()) + return nullptr; + + // Convert op of phis to phi of ops + for (auto &Op : I->operands()) { + if (!isa<PHINode>(Op)) + continue; + auto *OpPHI = cast<PHINode>(Op); + // No point in doing this for one-operand phis. + if (OpPHI->getNumOperands() == 1) + continue; + if (!DebugCounter::shouldExecute(PHIOfOpsCounter)) + return nullptr; + SmallVector<std::pair<Value *, BasicBlock *>, 4> Ops; + auto *PHIBlock = getBlockForValue(OpPHI); + for (auto PredBB : OpPHI->blocks()) { + Value *FoundVal = nullptr; + // We could just skip unreachable edges entirely but it's tricky to do + // with rewriting existing phi nodes. + if (ReachableEdges.count({PredBB, PHIBlock})) { + // Clone the instruction, create an expression from it, and see if we + // have a leader. + Instruction *ValueOp = I->clone(); + auto Iter = TempToMemory.end(); + if (MemAccess) + Iter = TempToMemory.insert({ValueOp, MemAccess}).first; + + for (auto &Op : ValueOp->operands()) { + Op = Op->DoPHITranslation(PHIBlock, PredBB); + // When this operand changes, it could change whether there is a + // leader for us or not. + addAdditionalUsers(Op, I); + } + // Make sure it's marked as a temporary instruction. + AllTempInstructions.insert(ValueOp); + // and make sure anything that tries to add it's DFS number is + // redirected to the instruction we are making a phi of ops + // for. + InstrDFS.insert({ValueOp, IDFSNum}); + const Expression *E = performSymbolicEvaluation(ValueOp, Visited); + InstrDFS.erase(ValueOp); + AllTempInstructions.erase(ValueOp); + ValueOp->deleteValue(); + if (MemAccess) + TempToMemory.erase(Iter); + if (!E) + return nullptr; + FoundVal = findPhiOfOpsLeader(E, PredBB); + if (!FoundVal) { + ExpressionToPhiOfOps[E].insert(I); + return nullptr; + } + if (auto *SI = dyn_cast<StoreInst>(FoundVal)) + FoundVal = SI->getValueOperand(); + } else { + DEBUG(dbgs() << "Skipping phi of ops operand for incoming block " + << getBlockName(PredBB) + << " because the block is unreachable\n"); + FoundVal = UndefValue::get(I->getType()); + } + + Ops.push_back({FoundVal, PredBB}); + DEBUG(dbgs() << "Found phi of ops operand " << *FoundVal << " in " + << getBlockName(PredBB) << "\n"); + } + auto *ValuePHI = RealToTemp.lookup(I); + bool NewPHI = false; + if (!ValuePHI) { + ValuePHI = PHINode::Create(I->getType(), OpPHI->getNumOperands()); + addPhiOfOps(ValuePHI, PHIBlock, I); + NewPHI = true; + NumGVNPHIOfOpsCreated++; + } + if (NewPHI) { + for (auto PHIOp : Ops) + ValuePHI->addIncoming(PHIOp.first, PHIOp.second); + } else { + unsigned int i = 0; + for (auto PHIOp : Ops) { + ValuePHI->setIncomingValue(i, PHIOp.first); + ValuePHI->setIncomingBlock(i, PHIOp.second); + ++i; + } + } + + DEBUG(dbgs() << "Created phi of ops " << *ValuePHI << " for " << *I + << "\n"); + return performSymbolicEvaluation(ValuePHI, Visited); + } + return nullptr; +} + // The algorithm initially places the values of the routine in the TOP // congruence class. The leader of TOP is the undetermined value `undef`. // When the algorithm has finished, values still in TOP are unreachable. @@ -2287,6 +2588,12 @@ void NewGVN::initializeCongruenceClasses(Function &F) { TOPClass->incStoreCount(); } for (auto &I : *BB) { + // TODO: Move to helper + if (isa<PHINode>(&I)) + for (auto *U : I.users()) + if (auto *UInst = dyn_cast<Instruction>(U)) + if (InstrToDFSNum(UInst) != 0 && okayForPHIOfOps(UInst)) + PHINodeUses.insert(UInst); // Don't insert void terminators into the class. We don't value number // them, and they just end up sitting in TOP. if (isa<TerminatorInst>(I) && I.getType()->isVoidTy()) @@ -2311,12 +2618,35 @@ void NewGVN::cleanupTables() { CongruenceClasses[i] = nullptr; } + // Destroy the value expressions + SmallVector<Instruction *, 8> TempInst(AllTempInstructions.begin(), + AllTempInstructions.end()); + AllTempInstructions.clear(); + + // We have to drop all references for everything first, so there are no uses + // left as we delete them. + for (auto *I : TempInst) { + I->dropAllReferences(); + } + + while (!TempInst.empty()) { + auto *I = TempInst.back(); + TempInst.pop_back(); + I->deleteValue(); + } + ValueToClass.clear(); ArgRecycler.clear(ExpressionAllocator); ExpressionAllocator.Reset(); CongruenceClasses.clear(); ExpressionToClass.clear(); ValueToExpression.clear(); + RealToTemp.clear(); + AdditionalUsers.clear(); + ExpressionToPhiOfOps.clear(); + TempToBlock.clear(); + TempToMemory.clear(); + PHIOfOpsPHIs.clear(); ReachableBlocks.clear(); ReachableEdges.clear(); #ifndef NDEBUG @@ -2332,14 +2662,17 @@ void NewGVN::cleanupTables() { MemoryToUsers.clear(); } +// Assign local DFS number mapping to instructions, and leave space for Value +// PHI's. std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B, unsigned Start) { unsigned End = Start; - if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(B)) { + if (MemoryAccess *MemPhi = getMemoryAccess(B)) { InstrDFS[MemPhi] = End++; DFSToInstr.emplace_back(MemPhi); } + // Then the real block goes next. for (auto &I : *B) { // There's no need to call isInstructionTriviallyDead more than once on // an instruction. Therefore, once we know that an instruction is dead @@ -2350,7 +2683,6 @@ std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B, markInstructionForDeletion(&I); continue; } - InstrDFS[&I] = End++; DFSToInstr.emplace_back(&I); } @@ -2361,7 +2693,7 @@ std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B, return std::make_pair(Start, End); } -void NewGVN::updateProcessedCount(Value *V) { +void NewGVN::updateProcessedCount(const Value *V) { #ifndef NDEBUG if (ProcessedCount.count(V) == 0) { ProcessedCount.insert({V, 1}); @@ -2375,12 +2707,13 @@ void NewGVN::updateProcessedCount(Value *V) { // Evaluate MemoryPhi nodes symbolically, just like PHI nodes void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) { // If all the arguments are the same, the MemoryPhi has the same value as the - // argument. - // Filter out unreachable blocks and self phis from our operands. + // argument. Filter out unreachable blocks and self phis from our operands. + // TODO: We could do cycle-checking on the memory phis to allow valueizing for + // self-phi checking. const BasicBlock *PHIBlock = MP->getBlock(); auto Filtered = make_filter_range(MP->operands(), [&](const Use &U) { - return lookupMemoryLeader(cast<MemoryAccess>(U)) != MP && - !isMemoryAccessTop(cast<MemoryAccess>(U)) && + return cast<MemoryAccess>(U) != MP && + !isMemoryAccessTOP(cast<MemoryAccess>(U)) && ReachableEdges.count({MP->getIncomingBlock(U), PHIBlock}); }); // If all that is left is nothing, our memoryphi is undef. We keep it as @@ -2433,18 +2766,26 @@ void NewGVN::valueNumberInstruction(Instruction *I) { DEBUG(dbgs() << "Processing instruction " << *I << "\n"); if (!I->isTerminator()) { const Expression *Symbolized = nullptr; + SmallPtrSet<Value *, 2> Visited; if (DebugCounter::shouldExecute(VNCounter)) { - Symbolized = performSymbolicEvaluation(I); + Symbolized = performSymbolicEvaluation(I, Visited); + // Make a phi of ops if necessary + if (Symbolized && !isa<ConstantExpression>(Symbolized) && + !isa<VariableExpression>(Symbolized) && PHINodeUses.count(I)) { + // FIXME: Backedge argument + auto *PHIE = makePossiblePhiOfOps(I, false, Visited); + if (PHIE) + Symbolized = PHIE; + } + } else { // Mark the instruction as unused so we don't value number it again. InstrDFS[I] = 0; } // If we couldn't come up with a symbolic expression, use the unknown // expression - if (Symbolized == nullptr) { + if (Symbolized == nullptr) Symbolized = createUnknownExpression(I); - } - performCongruenceFinding(I, Symbolized); } else { // Handle terminators that return values. All of them produce values we @@ -2460,13 +2801,23 @@ void NewGVN::valueNumberInstruction(Instruction *I) { // Check if there is a path, using single or equal argument phi nodes, from // First to Second. -bool NewGVN::singleReachablePHIPath(const MemoryAccess *First, - const MemoryAccess *Second) const { +bool NewGVN::singleReachablePHIPath( + SmallPtrSet<const MemoryAccess *, 8> &Visited, const MemoryAccess *First, + const MemoryAccess *Second) const { if (First == Second) return true; if (MSSA->isLiveOnEntryDef(First)) return false; + // This is not perfect, but as we're just verifying here, we can live with + // the loss of precision. The real solution would be that of doing strongly + // connected component finding in this routine, and it's probably not worth + // the complexity for the time being. So, we just keep a set of visited + // MemoryAccess and return true when we hit a cycle. + if (Visited.count(First)) + return true; + Visited.insert(First); + const auto *EndDef = First; for (auto *ChainDef : optimized_def_chain(First)) { if (ChainDef == Second) @@ -2489,7 +2840,8 @@ bool NewGVN::singleReachablePHIPath(const MemoryAccess *First, Okay = std::equal(OperandList.begin(), OperandList.end(), OperandList.begin()); if (Okay) - return singleReachablePHIPath(cast<MemoryAccess>(OperandList[0]), Second); + return singleReachablePHIPath(Visited, cast<MemoryAccess>(OperandList[0]), + Second); return false; } @@ -2552,17 +2904,17 @@ void NewGVN::verifyMemoryCongruency() const { auto Filtered = make_filter_range(MemoryAccessToClass, ReachableAccessPred); for (auto KV : Filtered) { - assert(KV.second != TOPClass && - "Memory not unreachable but ended up in TOP"); if (auto *FirstMUD = dyn_cast<MemoryUseOrDef>(KV.first)) { auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second->getMemoryLeader()); - if (FirstMUD && SecondMUD) - assert((singleReachablePHIPath(FirstMUD, SecondMUD) || + if (FirstMUD && SecondMUD) { + SmallPtrSet<const MemoryAccess *, 8> VisitedMAS; + assert((singleReachablePHIPath(VisitedMAS, FirstMUD, SecondMUD) || ValueToClass.lookup(FirstMUD->getMemoryInst()) == ValueToClass.lookup(SecondMUD->getMemoryInst())) && "The instructions for these memory operations should have " "been in the same congruence class or reachable through" "a single argument phi"); + } } else if (auto *FirstMP = dyn_cast<MemoryPhi>(KV.first)) { // We can only sanely verify that MemoryDefs in the operand list all have // the same class. @@ -2671,7 +3023,7 @@ void NewGVN::iterateTouchedInstructions() { // Nothing set, nothing to iterate, just return. if (FirstInstr == -1) return; - BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr)); + const BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr)); while (TouchedInstructions.any()) { ++Iterations; // Walk through all the instructions in all the blocks in RPO. @@ -2688,7 +3040,7 @@ void NewGVN::iterateTouchedInstructions() { } Value *V = InstrFromDFSNum(InstrNum); - BasicBlock *CurrBlock = getBlockForValue(V); + const BasicBlock *CurrBlock = getBlockForValue(V); // If we hit a new block, do reachability processing. if (CurrBlock != LastBlock) { @@ -2731,6 +3083,7 @@ bool NewGVN::runGVN() { bool Changed = false; NumFuncArgs = F.arg_size(); MSSAWalker = MSSA->getWalker(); + SingletonDeadExpression = new (ExpressionAllocator) DeadExpression(); // Count number of instructions for sizing of hash tables, and come // up with a global dfs numbering for instructions. @@ -2769,6 +3122,7 @@ bool NewGVN::runGVN() { BlockInstRange.insert({B, BlockRange}); ICount += BlockRange.second - BlockRange.first; } + initializeCongruenceClasses(F); TouchedInstructions.resize(ICount); // Ensure we don't end up resizing the expressionToClass map, as @@ -2779,9 +3133,10 @@ bool NewGVN::runGVN() { // Initialize the touched instructions to include the entry block. const auto &InstRange = BlockInstRange.lookup(&F.getEntryBlock()); TouchedInstructions.set(InstRange.first, InstRange.second); + DEBUG(dbgs() << "Block " << getBlockName(&F.getEntryBlock()) + << " marked reachable\n"); ReachableBlocks.insert(&F.getEntryBlock()); - initializeCongruenceClasses(F); iterateTouchedInstructions(); verifyMemoryCongruency(); verifyIterationSettled(F); @@ -2794,7 +3149,8 @@ bool NewGVN::runGVN() { if (!ToErase->use_empty()) ToErase->replaceAllUsesWith(UndefValue::get(ToErase->getType())); - ToErase->eraseFromParent(); + if (ToErase->getParent()) + ToErase->eraseFromParent(); } // Delete all unreachable blocks. @@ -2813,14 +3169,6 @@ bool NewGVN::runGVN() { return Changed; } -// Return true if V is a value that will always be available (IE can -// be placed anywhere) in the function. We don't do globals here -// because they are often worse to put in place. -// TODO: Separate cost from availability -static bool alwaysAvailable(Value *V) { - return isa<Constant>(V) || isa<Argument>(V); -} - struct NewGVN::ValueDFS { int DFSIn = 0; int DFSOut = 0; @@ -2910,9 +3258,21 @@ void NewGVN::convertClassToDFSOrdered( } assert(isa<Instruction>(D) && "The dense set member should always be an instruction"); - VDDef.LocalNum = InstrToDFSNum(D); - DFSOrderedSet.emplace_back(VDDef); Instruction *Def = cast<Instruction>(D); + VDDef.LocalNum = InstrToDFSNum(D); + DFSOrderedSet.push_back(VDDef); + // If there is a phi node equivalent, add it + if (auto *PN = RealToTemp.lookup(Def)) { + auto *PHIE = + dyn_cast_or_null<PHIExpression>(ValueToExpression.lookup(Def)); + if (PHIE) { + VDDef.Def.setInt(false); + VDDef.Def.setPointer(PN); + VDDef.LocalNum = 0; + DFSOrderedSet.push_back(VDDef); + } + } + unsigned int UseCount = 0; // Now add the uses. for (auto &U : Def->uses()) { @@ -2929,7 +3289,7 @@ void NewGVN::convertClassToDFSOrdered( // they are from. VDUse.LocalNum = InstrDFS.size() + 1; } else { - IBlock = I->getParent(); + IBlock = getBlockForValue(I); VDUse.LocalNum = InstrToDFSNum(I); } @@ -3099,6 +3459,37 @@ private: }; } +// Given a value and a basic block we are trying to see if it is available in, +// see if the value has a leader available in that block. +Value *NewGVN::findPhiOfOpsLeader(const Expression *E, + const BasicBlock *BB) const { + // It would already be constant if we could make it constant + if (auto *CE = dyn_cast<ConstantExpression>(E)) + return CE->getConstantValue(); + if (auto *VE = dyn_cast<VariableExpression>(E)) + return VE->getVariableValue(); + + auto *CC = ExpressionToClass.lookup(E); + if (!CC) + return nullptr; + if (alwaysAvailable(CC->getLeader())) + return CC->getLeader(); + + for (auto Member : *CC) { + auto *MemberInst = dyn_cast<Instruction>(Member); + // Anything that isn't an instruction is always available. + if (!MemberInst) + return Member; + // If we are looking for something in the same block as the member, it must + // be a leader because this function is looking for operands for a phi node. + if (MemberInst->getParent() == BB || + DT->dominates(MemberInst->getParent(), BB)) { + return Member; + } + } + return nullptr; +} + bool NewGVN::eliminateInstructions(Function &F) { // This is a non-standard eliminator. The normal way to eliminate is // to walk the dominator tree in order, keeping track of available @@ -3129,25 +3520,42 @@ bool NewGVN::eliminateInstructions(Function &F) { // DFS numbers are updated, we compute some ourselves. DT->updateDFSNumbers(); - for (auto &B : F) { - if (!ReachableBlocks.count(&B)) { - for (const auto S : successors(&B)) { - for (auto II = S->begin(); isa<PHINode>(II); ++II) { - auto &Phi = cast<PHINode>(*II); - DEBUG(dbgs() << "Replacing incoming value of " << *II << " for block " - << getBlockName(&B) - << " with undef due to it being unreachable\n"); - for (auto &Operand : Phi.incoming_values()) - if (Phi.getIncomingBlock(Operand) == &B) - Operand.set(UndefValue::get(Phi.getType())); - } + // Go through all of our phi nodes, and kill the arguments associated with + // unreachable edges. + auto ReplaceUnreachablePHIArgs = [&](PHINode &PHI, BasicBlock *BB) { + for (auto &Operand : PHI.incoming_values()) + if (!ReachableEdges.count({PHI.getIncomingBlock(Operand), BB})) { + DEBUG(dbgs() << "Replacing incoming value of " << PHI << " for block " + << getBlockName(PHI.getIncomingBlock(Operand)) + << " with undef due to it being unreachable\n"); + Operand.set(UndefValue::get(PHI.getType())); } + }; + SmallPtrSet<BasicBlock *, 8> BlocksWithPhis; + for (auto &B : F) + if ((!B.empty() && isa<PHINode>(*B.begin())) || + (PHIOfOpsPHIs.find(&B) != PHIOfOpsPHIs.end())) + BlocksWithPhis.insert(&B); + DenseMap<const BasicBlock *, unsigned> ReachablePredCount; + for (auto KV : ReachableEdges) + ReachablePredCount[KV.getEnd()]++; + for (auto *BB : BlocksWithPhis) + // TODO: It would be faster to use getNumIncomingBlocks() on a phi node in + // the block and subtract the pred count, but it's more complicated. + if (ReachablePredCount.lookup(BB) != + std::distance(pred_begin(BB), pred_end(BB))) { + for (auto II = BB->begin(); isa<PHINode>(II); ++II) { + auto &PHI = cast<PHINode>(*II); + ReplaceUnreachablePHIArgs(PHI, BB); + } + for_each_found(PHIOfOpsPHIs, BB, [&](PHINode *PHI) { + ReplaceUnreachablePHIArgs(*PHI, BB); + }); } - } // Map to store the use counts DenseMap<const Value *, unsigned int> UseCounts; - for (CongruenceClass *CC : reverse(CongruenceClasses)) { + for (auto *CC : reverse(CongruenceClasses)) { // Track the equivalent store info so we can decide whether to try // dead store elimination. SmallVector<ValueDFS, 8> PossibleDeadStores; @@ -3156,13 +3564,15 @@ bool NewGVN::eliminateInstructions(Function &F) { continue; // Everything still in the TOP class is unreachable or dead. if (CC == TOPClass) { -#ifndef NDEBUG - for (auto M : *CC) + for (auto M : *CC) { + auto *VTE = ValueToExpression.lookup(M); + if (VTE && isa<DeadExpression>(VTE)) + markInstructionForDeletion(cast<Instruction>(M)); assert((!ReachableBlocks.count(cast<Instruction>(M)->getParent()) || InstructionsToErase.count(cast<Instruction>(M))) && "Everything in TOP should be unreachable or dead at this " "point"); -#endif + } continue; } @@ -3195,7 +3605,7 @@ bool NewGVN::eliminateInstructions(Function &F) { DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID() << "\n"); // If this is a singleton, we can skip it. - if (CC->size() != 1) { + if (CC->size() != 1 || RealToTemp.lookup(Leader)) { // This is a stack because equality replacement/etc may place // constants in the middle of the member list, and we want to use // those constant values in preference to the current leader, over @@ -3217,6 +3627,22 @@ bool NewGVN::eliminateInstructions(Function &F) { // We ignore void things because we can't get a value from them. if (Def && Def->getType()->isVoidTy()) continue; + auto *DefInst = dyn_cast_or_null<Instruction>(Def); + if (DefInst && AllTempInstructions.count(DefInst)) { + auto *PN = cast<PHINode>(DefInst); + + // If this is a value phi and that's the expression we used, insert + // it into the program + // remove from temp instruction list. + AllTempInstructions.erase(PN); + auto *DefBlock = getBlockForValue(Def); + DEBUG(dbgs() << "Inserting fully real phi of ops" << *Def + << " into block " + << getBlockName(getBlockForValue(Def)) << "\n"); + PN->insertBefore(&DefBlock->front()); + Def = PN; + NumGVNPHIOfOpsEliminations++; + } if (EliminationStack.empty()) { DEBUG(dbgs() << "Elimination Stack is empty\n"); @@ -3301,6 +3727,10 @@ bool NewGVN::eliminateInstructions(Function &F) { Value *DominatingLeader = EliminationStack.back(); + auto *II = dyn_cast<IntrinsicInst>(DominatingLeader); + if (II && II->getIntrinsicID() == Intrinsic::ssa_copy) + DominatingLeader = II->getOperand(0); + // Don't replace our existing users with ourselves. if (U->get() == DominatingLeader) continue; @@ -3321,6 +3751,8 @@ bool NewGVN::eliminateInstructions(Function &F) { // It's about to be alive again. if (LeaderUseCount == 0 && isa<Instruction>(DominatingLeader)) ProbablyDead.erase(cast<Instruction>(DominatingLeader)); + if (LeaderUseCount == 0 && II) + ProbablyDead.insert(II); ++LeaderUseCount; AnythingReplaced = true; } @@ -3375,7 +3807,6 @@ bool NewGVN::eliminateInstructions(Function &F) { } } } - return AnythingReplaced; } @@ -3385,19 +3816,23 @@ bool NewGVN::eliminateInstructions(Function &F) { // we will simplify an operation with all constants so that it doesn't matter // what order they appear in. unsigned int NewGVN::getRank(const Value *V) const { - // Prefer undef to anything else + // Prefer constants to undef to anything else + // Undef is a constant, have to check it first. + // Prefer smaller constants to constantexprs + if (isa<ConstantExpr>(V)) + return 2; if (isa<UndefValue>(V)) - return 0; - if (isa<Constant>(V)) return 1; + if (isa<Constant>(V)) + return 0; else if (auto *A = dyn_cast<Argument>(V)) - return 2 + A->getArgNo(); + return 3 + A->getArgNo(); // Need to shift the instruction DFS by number of arguments + 3 to account for // the constant and argument ranking above. unsigned Result = InstrToDFSNum(V); if (Result > 0) - return 3 + NumFuncArgs + Result; + return 4 + NumFuncArgs + Result; // Unreachable or something else, just return a really large number. return ~0; } diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 53320bff0883..a20890b22603 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -1582,7 +1582,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I, } // No need for extra uses anymore. - delete DummyInst; + DummyInst->deleteValue(); unsigned NumAddedValues = NewMulOps.size(); Value *V = EmitAddTreeOfValues(I, NewMulOps); diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 1d9beffaf06b..24bd0a2b7bdf 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -2443,7 +2443,7 @@ private: "insert"); LI.replaceAllUsesWith(V); Placeholder->replaceAllUsesWith(&LI); - delete Placeholder; + Placeholder->deleteValue(); } else { LI.replaceAllUsesWith(V); } @@ -3898,8 +3898,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, } NumAllocaPartitionUses += NumUses; - MaxUsesPerAllocaPartition = - std::max<unsigned>(NumUses, MaxUsesPerAllocaPartition); + MaxUsesPerAllocaPartition.updateMax(NumUses); // Now that we've processed all the slices in the new partition, check if any // PHIs or Selects would block promotion. @@ -4016,8 +4015,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { } NumAllocaPartitions += NumPartitions; - MaxPartitionsPerAlloca = - std::max<unsigned>(NumPartitions, MaxPartitionsPerAlloca); + MaxPartitionsPerAlloca.updateMax(NumPartitions); // Migrate debug information from the old alloca to the new alloca(s) // and the individual partitions. diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp index 2be3f5c533b9..8b8d6590aa6a 100644 --- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -693,7 +693,7 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) { UnlinkedInst->setOperand(I, nullptr); RecursivelyDeleteTriviallyDeadInstructions(Op); } - delete UnlinkedInst; + UnlinkedInst->deleteValue(); } bool Ret = !UnlinkedInstructions.empty(); UnlinkedInstructions.clear(); diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 4aa26fd14fee..bf2ab7c55be2 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -317,7 +317,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, if (!NewInst->mayHaveSideEffects()) { VMap[&*II] = V; - delete NewInst; + NewInst->deleteValue(); continue; } } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index b44bc74d6551..27f72fcd8bda 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2235,7 +2235,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL, if (!BBI->use_empty()) TranslateMap[&*BBI] = V; if (!N->mayHaveSideEffects()) { - delete N; // Instruction folded away, don't need actual inst + N->deleteValue(); // Instruction folded away, don't need actual inst N = nullptr; } } else { @@ -4380,7 +4380,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, // Gather dead cases. SmallVector<ConstantInt *, 8> DeadCases; for (auto &Case : SI->cases()) { - APInt CaseVal = Case.getCaseValue()->getValue(); + const APInt &CaseVal = Case.getCaseValue()->getValue(); if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) || (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) { DeadCases.push_back(Case.getCaseValue()); @@ -4946,7 +4946,7 @@ SwitchLookupTable::SwitchLookupTable( LinearMappingPossible = false; break; } - APInt Val = ConstVal->getValue(); + const APInt &Val = ConstVal->getValue(); if (I != 0) { APInt Dist = Val - PrevVal; if (I == 1) { diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 1de579ed41b0..85c9464b5569 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -426,57 +426,70 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { return Dst; } -Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { +Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B, + unsigned CharSize) { Value *Src = CI->getArgOperand(0); // Constant folding: strlen("xyz") -> 3 - if (uint64_t Len = GetStringLength(Src)) + if (uint64_t Len = GetStringLength(Src, CharSize)) return ConstantInt::get(CI->getType(), Len - 1); // If s is a constant pointer pointing to a string literal, we can fold - // strlen(s + x) to strlen(s) - x, when x is known to be in the range + // strlen(s + x) to strlen(s) - x, when x is known to be in the range // [0, strlen(s)] or the string has a single null terminator '\0' at the end. - // We only try to simplify strlen when the pointer s points to an array + // We only try to simplify strlen when the pointer s points to an array // of i8. Otherwise, we would need to scale the offset x before doing the - // subtraction. This will make the optimization more complex, and it's not - // very useful because calling strlen for a pointer of other types is + // subtraction. This will make the optimization more complex, and it's not + // very useful because calling strlen for a pointer of other types is // very uncommon. if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) { - if (!isGEPBasedOnPointerToString(GEP)) + if (!isGEPBasedOnPointerToString(GEP, CharSize)) return nullptr; - StringRef Str; - if (getConstantStringInfo(GEP->getOperand(0), Str, 0, false)) { - size_t NullTermIdx = Str.find('\0'); - - // If the string does not have '\0', leave it to strlen to compute - // its length. - if (NullTermIdx == StringRef::npos) - return nullptr; - + ConstantDataArraySlice Slice; + if (getConstantDataArrayInfo(GEP->getOperand(0), Slice, CharSize)) { + uint64_t NullTermIdx; + if (Slice.Array == nullptr) { + NullTermIdx = 0; + } else { + NullTermIdx = ~((uint64_t)0); + for (uint64_t I = 0, E = Slice.Length; I < E; ++I) { + if (Slice.Array->getElementAsInteger(I + Slice.Offset) == 0) { + NullTermIdx = I; + break; + } + } + // If the string does not have '\0', leave it to strlen to compute + // its length. + if (NullTermIdx == ~((uint64_t)0)) + return nullptr; + } + Value *Offset = GEP->getOperand(2); unsigned BitWidth = Offset->getType()->getIntegerBitWidth(); KnownBits Known(BitWidth); computeKnownBits(Offset, Known, DL, 0, nullptr, CI, nullptr); Known.Zero.flipAllBits(); - size_t ArrSize = + uint64_t ArrSize = cast<ArrayType>(GEP->getSourceElementType())->getNumElements(); - // KnownZero's bits are flipped, so zeros in KnownZero now represent - // bits known to be zeros in Offset, and ones in KnowZero represent + // KnownZero's bits are flipped, so zeros in KnownZero now represent + // bits known to be zeros in Offset, and ones in KnowZero represent // bits unknown in Offset. Therefore, Offset is known to be in range - // [0, NullTermIdx] when the flipped KnownZero is non-negative and + // [0, NullTermIdx] when the flipped KnownZero is non-negative and // unsigned-less-than NullTermIdx. // - // If Offset is not provably in the range [0, NullTermIdx], we can still - // optimize if we can prove that the program has undefined behavior when - // Offset is outside that range. That is the case when GEP->getOperand(0) + // If Offset is not provably in the range [0, NullTermIdx], we can still + // optimize if we can prove that the program has undefined behavior when + // Offset is outside that range. That is the case when GEP->getOperand(0) // is a pointer to an object whose memory extent is NullTermIdx+1. - if ((Known.Zero.isNonNegative() && Known.Zero.ule(NullTermIdx)) || + if ((Known.Zero.isNonNegative() && Known.Zero.ule(NullTermIdx)) || (GEP->isInBounds() && isa<GlobalVariable>(GEP->getOperand(0)) && - NullTermIdx == ArrSize - 1)) - return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx), + NullTermIdx == ArrSize - 1)) { + Offset = B.CreateSExtOrTrunc(Offset, CI->getType()); + return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx), Offset); + } } return nullptr; @@ -484,8 +497,8 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { // strlen(x?"foo":"bars") --> x ? 3 : 4 if (SelectInst *SI = dyn_cast<SelectInst>(Src)) { - uint64_t LenTrue = GetStringLength(SI->getTrueValue()); - uint64_t LenFalse = GetStringLength(SI->getFalseValue()); + uint64_t LenTrue = GetStringLength(SI->getTrueValue(), CharSize); + uint64_t LenFalse = GetStringLength(SI->getFalseValue(), CharSize); if (LenTrue && LenFalse) { Function *Caller = CI->getParent()->getParent(); emitOptimizationRemark(CI->getContext(), "simplify-libcalls", *Caller, @@ -505,6 +518,17 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { return nullptr; } +Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { + return optimizeStringLength(CI, B, 8); +} + +Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilder<> &B) { + Module &M = *CI->getParent()->getParent()->getParent(); + unsigned WCharSize = TLI->getWCharSize(M) * 8; + + return optimizeStringLength(CI, B, WCharSize); +} + Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) { StringRef S1, S2; bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); @@ -2026,6 +2050,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return optimizeMemMove(CI, Builder); case LibFunc_memset: return optimizeMemSet(CI, Builder); + case LibFunc_wcslen: + return optimizeWcslen(CI, Builder); default: break; } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 516ab7d03a88..1dc554bede7e 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3047,7 +3047,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { if (CreateGatherScatter) { Value *MaskPart = Legal->isMaskRequired(LI) ? Mask[Part] : nullptr; NewLI = Builder.CreateMaskedGather(VectorGep[Part], Alignment, MaskPart, - 0, "wide.masked.gather"); + nullptr, "wide.masked.gather"); Entry[Part] = NewLI; } else { // Calculate the pointer for the specific unroll-part. diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 64013d6d687d..e6f78e6b94a3 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -578,12 +578,12 @@ private: void eraseInstruction(Instruction *I) { I->removeFromParent(); I->dropAllReferences(); - DeletedInstructions.push_back(std::unique_ptr<Instruction>(I)); + DeletedInstructions.emplace_back(I); } /// Temporary store for deleted instructions. Instructions will be deleted /// eventually when the BoUpSLP is destructed. - SmallVector<std::unique_ptr<Instruction>, 8> DeletedInstructions; + SmallVector<unique_value, 8> DeletedInstructions; /// A list of values that need to extracted out of the tree. /// This list holds pairs of (Internal Scalar : External User). External User |