diff options
Diffstat (limited to 'llvm/lib/Transforms/Utils/SimplifyCFG.cpp')
-rw-r--r-- | llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 1874 |
1 files changed, 1110 insertions, 764 deletions
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index b450d71c996c..7cfe17618cde 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -13,8 +13,11 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/Sequence.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -58,6 +61,7 @@ #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -67,6 +71,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> #include <cassert> @@ -85,6 +90,12 @@ using namespace PatternMatch; #define DEBUG_TYPE "simplifycfg" +cl::opt<bool> llvm::RequireAndPreserveDomTree( + "simplifycfg-require-and-preserve-domtree", cl::Hidden, cl::ZeroOrMore, + cl::init(false), + cl::desc("Temorary development switch used to gradually uplift SimplifyCFG " + "into preserving DomTree,")); + // Chosen as 2 so as to be cheap, but still to have enough power to fold // a select, so the "clamp" idiom (of a min followed by a max) will be caught. // To catch this, we need to fold a compare and a select, hence '2' being the @@ -105,6 +116,10 @@ static cl::opt<bool> DupRet( cl::desc("Duplicate return instructions into unconditional branches")); static cl::opt<bool> + HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), + cl::desc("Hoist common instructions up to the parent block")); + +static cl::opt<bool> SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block")); @@ -138,6 +153,13 @@ MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through")); +// Two is chosen to allow one negation and a logical combine. +static cl::opt<unsigned> + BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, + cl::init(2), + cl::desc("Maximum cost of combining conditions when " + "folding branches")); + STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); @@ -147,9 +169,22 @@ STATISTIC( NumLookupTablesHoles, "Number of switch instructions turned into lookup tables (holes checked)"); STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares"); -STATISTIC(NumSinkCommons, +STATISTIC(NumFoldValueComparisonIntoPredecessors, + "Number of value comparisons folded into predecessor basic blocks"); +STATISTIC(NumFoldBranchToCommonDest, + "Number of branches folded into predecessor basic block"); +STATISTIC( + NumHoistCommonCode, + "Number of common instruction 'blocks' hoisted up to the begin block"); +STATISTIC(NumHoistCommonInstrs, + "Number of common instructions hoisted up to the begin block"); +STATISTIC(NumSinkCommonCode, + "Number of common instruction 'blocks' sunk down to the end block"); +STATISTIC(NumSinkCommonInstrs, "Number of common instructions sunk down to the end block"); STATISTIC(NumSpeculations, "Number of speculative executed instructions"); +STATISTIC(NumInvokes, + "Number of invokes with empty resume blocks simplified into calls"); namespace { @@ -182,8 +217,9 @@ struct ValueEqualityComparisonCase { class SimplifyCFGOpt { const TargetTransformInfo &TTI; + DomTreeUpdater *DTU; const DataLayout &DL; - SmallPtrSetImpl<BasicBlock *> *LoopHeaders; + ArrayRef<WeakVH> LoopHeaders; const SimplifyCFGOptions &Options; bool Resimplify; @@ -193,6 +229,9 @@ class SimplifyCFGOpt { bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder); + bool PerformValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV, + Instruction *PTI, + IRBuilder<> &Builder); bool FoldValueComparisonIntoPredecessors(Instruction *TI, IRBuilder<> &Builder); @@ -225,13 +264,18 @@ class SimplifyCFGOpt { bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder); public: - SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL, - SmallPtrSetImpl<BasicBlock *> *LoopHeaders, + SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU, + const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders, const SimplifyCFGOptions &Opts) - : TTI(TTI), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {} + : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) { + assert((!DTU || !DTU->hasPostDomTree()) && + "SimplifyCFG is not yet capable of maintaining validity of a " + "PostDomTree, so don't ask for it."); + } - bool run(BasicBlock *BB); bool simplifyOnce(BasicBlock *BB); + bool simplifyOnceImpl(BasicBlock *BB); + bool run(BasicBlock *BB); // Helper to set Resimplify and return change indication. bool requestResimplify() { @@ -273,46 +317,6 @@ SafeToMergeTerminators(Instruction *SI1, Instruction *SI2, return !Fail; } -/// Return true if it is safe and profitable to merge these two terminator -/// instructions together, where SI1 is an unconditional branch. PhiNodes will -/// store all PHI nodes in common successors. -static bool -isProfitableToFoldUnconditional(BranchInst *SI1, BranchInst *SI2, - Instruction *Cond, - SmallVectorImpl<PHINode *> &PhiNodes) { - if (SI1 == SI2) - return false; // Can't merge with self! - assert(SI1->isUnconditional() && SI2->isConditional()); - - // We fold the unconditional branch if we can easily update all PHI nodes in - // common successors: - // 1> We have a constant incoming value for the conditional branch; - // 2> We have "Cond" as the incoming value for the unconditional branch; - // 3> SI2->getCondition() and Cond have same operands. - CmpInst *Ci2 = dyn_cast<CmpInst>(SI2->getCondition()); - if (!Ci2) - return false; - if (!(Cond->getOperand(0) == Ci2->getOperand(0) && - Cond->getOperand(1) == Ci2->getOperand(1)) && - !(Cond->getOperand(0) == Ci2->getOperand(1) && - Cond->getOperand(1) == Ci2->getOperand(0))) - return false; - - BasicBlock *SI1BB = SI1->getParent(); - BasicBlock *SI2BB = SI2->getParent(); - SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); - for (BasicBlock *Succ : successors(SI2BB)) - if (SI1Succs.count(Succ)) - for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) { - PHINode *PN = cast<PHINode>(BBI); - if (PN->getIncomingValueForBlock(SI1BB) != Cond || - !isa<ConstantInt>(PN->getIncomingValueForBlock(SI2BB))) - return false; - PhiNodes.push_back(PN); - } - return true; -} - /// Update PHI nodes in Succ to indicate that there will now be entries in it /// from the 'NewPred' block. The values that will be flowing into the PHI nodes /// will be the same as those coming in from ExistPred, an existing predecessor @@ -651,7 +655,7 @@ private: /// vector. /// One "Extra" case is allowed to differ from the other. void gather(Value *V) { - bool isEQ = (cast<Instruction>(V)->getOpcode() == Instruction::Or); + bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value())); // Keep a stack (SmallVector for efficiency) for depth-first traversal SmallVector<Value *, 8> DFT; @@ -666,11 +670,14 @@ private: if (Instruction *I = dyn_cast<Instruction>(V)) { // If it is a || (or && depending on isEQ), process the operands. - if (I->getOpcode() == (isEQ ? Instruction::Or : Instruction::And)) { - if (Visited.insert(I->getOperand(1)).second) - DFT.push_back(I->getOperand(1)); - if (Visited.insert(I->getOperand(0)).second) - DFT.push_back(I->getOperand(0)); + Value *Op0, *Op1; + if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1))) + : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) { + if (Visited.insert(Op1).second) + DFT.push_back(Op1); + if (Visited.insert(Op0).second) + DFT.push_back(Op0); + continue; } @@ -765,7 +772,7 @@ BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases( static void EliminateBlockCases(BasicBlock *BB, std::vector<ValueEqualityComparisonCase> &Cases) { - Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end()); + llvm::erase_value(Cases, BB); } /// Return true if there are any keys in C1 that exist in C2 as well. @@ -875,13 +882,18 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( (void)NI; // Remove PHI node entries for the dead edge. - ThisCases[0].Dest->removePredecessor(TI->getParent()); + ThisCases[0].Dest->removePredecessor(PredDef); LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); EraseTerminatorAndDCECond(TI); + + if (DTU) + DTU->applyUpdates( + {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}}); + return true; } @@ -894,13 +906,25 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI); + SmallMapVector<BasicBlock *, int, 8> NumPerSuccessorCases; for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) { --i; + auto *Successor = i->getCaseSuccessor(); + ++NumPerSuccessorCases[Successor]; if (DeadCases.count(i->getCaseValue())) { - i->getCaseSuccessor()->removePredecessor(TI->getParent()); + Successor->removePredecessor(PredDef); SI.removeCase(i); + --NumPerSuccessorCases[Successor]; } } + + std::vector<DominatorTree::UpdateType> Updates; + for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases) + if (I.second == 0) + Updates.push_back({DominatorTree::Delete, PredDef, I.first}); + if (DTU) + DTU->applyUpdates(Updates); + LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n"); return true; } @@ -930,12 +954,16 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( if (!TheRealDest) TheRealDest = ThisDef; + SmallSetVector<BasicBlock *, 2> RemovedSuccs; + // Remove PHI node entries for dead edges. BasicBlock *CheckEdge = TheRealDest; for (BasicBlock *Succ : successors(TIBB)) - if (Succ != CheckEdge) + if (Succ != CheckEdge) { + if (Succ != TheRealDest) + RemovedSuccs.insert(Succ); Succ->removePredecessor(TIBB); - else + } else CheckEdge = nullptr; // Insert the new branch. @@ -947,6 +975,13 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( << "\n"); EraseTerminatorAndDCECond(TI); + if (DTU) { + SmallVector<DominatorTree::UpdateType, 2> Updates; + Updates.reserve(RemovedSuccs.size()); + for (auto *RemovedSucc : RemovedSuccs) + Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc}); + DTU->applyUpdates(Updates); + } return true; } @@ -1014,219 +1049,300 @@ static void FitWeights(MutableArrayRef<uint64_t> Weights) { } } -/// The specified terminator is a value equality comparison instruction -/// (either a switch or a branch on "X == c"). -/// See if any of the predecessors of the terminator block are value comparisons -/// on the same value. If so, and if safe to do so, fold them together. -bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI, - IRBuilder<> &Builder) { - BasicBlock *BB = TI->getParent(); - Value *CV = isValueEqualityComparison(TI); // CondVal - assert(CV && "Not a comparison?"); - bool Changed = false; +static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses( + BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) { + Instruction *PTI = PredBlock->getTerminator(); - SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); - while (!Preds.empty()) { - BasicBlock *Pred = Preds.pop_back_val(); + // If we have bonus instructions, clone them into the predecessor block. + // Note that there may be multiple predecessor blocks, so we cannot move + // bonus instructions to a predecessor block. + for (Instruction &BonusInst : *BB) { + if (isa<DbgInfoIntrinsic>(BonusInst) || BonusInst.isTerminator()) + continue; - // See if the predecessor is a comparison with the same value. - Instruction *PTI = Pred->getTerminator(); - Value *PCV = isValueEqualityComparison(PTI); // PredCondVal + Instruction *NewBonusInst = BonusInst.clone(); - if (PCV == CV && TI != PTI) { - SmallSetVector<BasicBlock*, 4> FailBlocks; - if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) { - for (auto *Succ : FailBlocks) { - if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split")) - return false; - } - } + if (PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) { + // Unless the instruction has the same !dbg location as the original + // branch, drop it. When we fold the bonus instructions we want to make + // sure we reset their debug locations in order to avoid stepping on + // dead code caused by folding dead branches. + NewBonusInst->setDebugLoc(DebugLoc()); + } - // Figure out which 'cases' to copy from SI to PSI. - std::vector<ValueEqualityComparisonCase> BBCases; - BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases); - - std::vector<ValueEqualityComparisonCase> PredCases; - BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases); - - // Based on whether the default edge from PTI goes to BB or not, fill in - // PredCases and PredDefault with the new switch cases we would like to - // build. - SmallVector<BasicBlock *, 8> NewSuccessors; - - // Update the branch weight metadata along the way - SmallVector<uint64_t, 8> Weights; - bool PredHasWeights = HasBranchWeights(PTI); - bool SuccHasWeights = HasBranchWeights(TI); - - if (PredHasWeights) { - GetBranchWeights(PTI, Weights); - // branch-weight metadata is inconsistent here. - if (Weights.size() != 1 + PredCases.size()) - PredHasWeights = SuccHasWeights = false; - } else if (SuccHasWeights) - // If there are no predecessor weights but there are successor weights, - // populate Weights with 1, which will later be scaled to the sum of - // successor's weights - Weights.assign(1 + PredCases.size(), 1); - - SmallVector<uint64_t, 8> SuccWeights; - if (SuccHasWeights) { - GetBranchWeights(TI, SuccWeights); - // branch-weight metadata is inconsistent here. - if (SuccWeights.size() != 1 + BBCases.size()) - PredHasWeights = SuccHasWeights = false; - } else if (PredHasWeights) - SuccWeights.assign(1 + BBCases.size(), 1); - - if (PredDefault == BB) { - // If this is the default destination from PTI, only the edges in TI - // that don't occur in PTI, or that branch to BB will be activated. - std::set<ConstantInt *, ConstantIntOrdering> PTIHandled; - for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - if (PredCases[i].Dest != BB) - PTIHandled.insert(PredCases[i].Value); - else { - // The default destination is BB, we don't need explicit targets. - std::swap(PredCases[i], PredCases.back()); - - if (PredHasWeights || SuccHasWeights) { - // Increase weight for the default case. - Weights[0] += Weights[i + 1]; - std::swap(Weights[i + 1], Weights.back()); - Weights.pop_back(); - } + RemapInstruction(NewBonusInst, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + VMap[&BonusInst] = NewBonusInst; + + // If we moved a load, we cannot any longer claim any knowledge about + // its potential value. The previous information might have been valid + // only given the branch precondition. + // For an analogous reason, we must also drop all the metadata whose + // semantics we don't understand. We *can* preserve !annotation, because + // it is tied to the instruction itself, not the value or position. + NewBonusInst->dropUnknownNonDebugMetadata(LLVMContext::MD_annotation); + + PredBlock->getInstList().insert(PTI->getIterator(), NewBonusInst); + NewBonusInst->takeName(&BonusInst); + BonusInst.setName(NewBonusInst->getName() + ".old"); + + // Update (liveout) uses of bonus instructions, + // now that the bonus instruction has been cloned into predecessor. + SSAUpdater SSAUpdate; + SSAUpdate.Initialize(BonusInst.getType(), + (NewBonusInst->getName() + ".merge").str()); + SSAUpdate.AddAvailableValue(BB, &BonusInst); + SSAUpdate.AddAvailableValue(PredBlock, NewBonusInst); + for (Use &U : make_early_inc_range(BonusInst.uses())) + SSAUpdate.RewriteUseAfterInsertions(U); + } +} - PredCases.pop_back(); - --i; - --e; - } +bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding( + Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) { + BasicBlock *BB = TI->getParent(); + BasicBlock *Pred = PTI->getParent(); + + std::vector<DominatorTree::UpdateType> Updates; + + // Figure out which 'cases' to copy from SI to PSI. + std::vector<ValueEqualityComparisonCase> BBCases; + BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases); - // Reconstruct the new switch statement we will be building. - if (PredDefault != BBDefault) { - PredDefault->removePredecessor(Pred); - PredDefault = BBDefault; - NewSuccessors.push_back(BBDefault); + std::vector<ValueEqualityComparisonCase> PredCases; + BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases); + + // Based on whether the default edge from PTI goes to BB or not, fill in + // PredCases and PredDefault with the new switch cases we would like to + // build. + SmallMapVector<BasicBlock *, int, 8> NewSuccessors; + + // Update the branch weight metadata along the way + SmallVector<uint64_t, 8> Weights; + bool PredHasWeights = HasBranchWeights(PTI); + bool SuccHasWeights = HasBranchWeights(TI); + + if (PredHasWeights) { + GetBranchWeights(PTI, Weights); + // branch-weight metadata is inconsistent here. + if (Weights.size() != 1 + PredCases.size()) + PredHasWeights = SuccHasWeights = false; + } else if (SuccHasWeights) + // If there are no predecessor weights but there are successor weights, + // populate Weights with 1, which will later be scaled to the sum of + // successor's weights + Weights.assign(1 + PredCases.size(), 1); + + SmallVector<uint64_t, 8> SuccWeights; + if (SuccHasWeights) { + GetBranchWeights(TI, SuccWeights); + // branch-weight metadata is inconsistent here. + if (SuccWeights.size() != 1 + BBCases.size()) + PredHasWeights = SuccHasWeights = false; + } else if (PredHasWeights) + SuccWeights.assign(1 + BBCases.size(), 1); + + if (PredDefault == BB) { + // If this is the default destination from PTI, only the edges in TI + // that don't occur in PTI, or that branch to BB will be activated. + std::set<ConstantInt *, ConstantIntOrdering> PTIHandled; + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + if (PredCases[i].Dest != BB) + PTIHandled.insert(PredCases[i].Value); + else { + // The default destination is BB, we don't need explicit targets. + std::swap(PredCases[i], PredCases.back()); + + if (PredHasWeights || SuccHasWeights) { + // Increase weight for the default case. + Weights[0] += Weights[i + 1]; + std::swap(Weights[i + 1], Weights.back()); + Weights.pop_back(); } - unsigned CasesFromPred = Weights.size(); - uint64_t ValidTotalSuccWeight = 0; - for (unsigned i = 0, e = BBCases.size(); i != e; ++i) - if (!PTIHandled.count(BBCases[i].Value) && - BBCases[i].Dest != BBDefault) { - PredCases.push_back(BBCases[i]); - NewSuccessors.push_back(BBCases[i].Dest); - if (SuccHasWeights || PredHasWeights) { - // The default weight is at index 0, so weight for the ith case - // should be at index i+1. Scale the cases from successor by - // PredDefaultWeight (Weights[0]). - Weights.push_back(Weights[0] * SuccWeights[i + 1]); - ValidTotalSuccWeight += SuccWeights[i + 1]; - } - } + PredCases.pop_back(); + --i; + --e; + } + // Reconstruct the new switch statement we will be building. + if (PredDefault != BBDefault) { + PredDefault->removePredecessor(Pred); + if (PredDefault != BB) + Updates.push_back({DominatorTree::Delete, Pred, PredDefault}); + PredDefault = BBDefault; + ++NewSuccessors[BBDefault]; + } + + unsigned CasesFromPred = Weights.size(); + uint64_t ValidTotalSuccWeight = 0; + for (unsigned i = 0, e = BBCases.size(); i != e; ++i) + if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) { + PredCases.push_back(BBCases[i]); + ++NewSuccessors[BBCases[i].Dest]; if (SuccHasWeights || PredHasWeights) { - ValidTotalSuccWeight += SuccWeights[0]; - // Scale the cases from predecessor by ValidTotalSuccWeight. - for (unsigned i = 1; i < CasesFromPred; ++i) - Weights[i] *= ValidTotalSuccWeight; - // Scale the default weight by SuccDefaultWeight (SuccWeights[0]). - Weights[0] *= SuccWeights[0]; + // The default weight is at index 0, so weight for the ith case + // should be at index i+1. Scale the cases from successor by + // PredDefaultWeight (Weights[0]). + Weights.push_back(Weights[0] * SuccWeights[i + 1]); + ValidTotalSuccWeight += SuccWeights[i + 1]; } - } else { - // If this is not the default destination from PSI, only the edges - // in SI that occur in PSI with a destination of BB will be - // activated. - std::set<ConstantInt *, ConstantIntOrdering> PTIHandled; - std::map<ConstantInt *, uint64_t> WeightsForHandled; - for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - if (PredCases[i].Dest == BB) { - PTIHandled.insert(PredCases[i].Value); - - if (PredHasWeights || SuccHasWeights) { - WeightsForHandled[PredCases[i].Value] = Weights[i + 1]; - std::swap(Weights[i + 1], Weights.back()); - Weights.pop_back(); - } - - std::swap(PredCases[i], PredCases.back()); - PredCases.pop_back(); - --i; - --e; - } + } - // Okay, now we know which constants were sent to BB from the - // predecessor. Figure out where they will all go now. - for (unsigned i = 0, e = BBCases.size(); i != e; ++i) - if (PTIHandled.count(BBCases[i].Value)) { - // If this is one we are capable of getting... - if (PredHasWeights || SuccHasWeights) - Weights.push_back(WeightsForHandled[BBCases[i].Value]); - PredCases.push_back(BBCases[i]); - NewSuccessors.push_back(BBCases[i].Dest); - PTIHandled.erase( - BBCases[i].Value); // This constant is taken care of - } + if (SuccHasWeights || PredHasWeights) { + ValidTotalSuccWeight += SuccWeights[0]; + // Scale the cases from predecessor by ValidTotalSuccWeight. + for (unsigned i = 1; i < CasesFromPred; ++i) + Weights[i] *= ValidTotalSuccWeight; + // Scale the default weight by SuccDefaultWeight (SuccWeights[0]). + Weights[0] *= SuccWeights[0]; + } + } else { + // If this is not the default destination from PSI, only the edges + // in SI that occur in PSI with a destination of BB will be + // activated. + std::set<ConstantInt *, ConstantIntOrdering> PTIHandled; + std::map<ConstantInt *, uint64_t> WeightsForHandled; + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + if (PredCases[i].Dest == BB) { + PTIHandled.insert(PredCases[i].Value); - // If there are any constants vectored to BB that TI doesn't handle, - // they must go to the default destination of TI. - for (ConstantInt *I : PTIHandled) { - if (PredHasWeights || SuccHasWeights) - Weights.push_back(WeightsForHandled[I]); - PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault)); - NewSuccessors.push_back(BBDefault); + if (PredHasWeights || SuccHasWeights) { + WeightsForHandled[PredCases[i].Value] = Weights[i + 1]; + std::swap(Weights[i + 1], Weights.back()); + Weights.pop_back(); } + + std::swap(PredCases[i], PredCases.back()); + PredCases.pop_back(); + --i; + --e; } - // Okay, at this point, we know which new successor Pred will get. Make - // sure we update the number of entries in the PHI nodes for these - // successors. - for (BasicBlock *NewSuccessor : NewSuccessors) - AddPredecessorToBlock(NewSuccessor, Pred, BB); - - Builder.SetInsertPoint(PTI); - // Convert pointer to int before we switch. - if (CV->getType()->isPointerTy()) { - CV = Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), - "magicptr"); + // Okay, now we know which constants were sent to BB from the + // predecessor. Figure out where they will all go now. + for (unsigned i = 0, e = BBCases.size(); i != e; ++i) + if (PTIHandled.count(BBCases[i].Value)) { + // If this is one we are capable of getting... + if (PredHasWeights || SuccHasWeights) + Weights.push_back(WeightsForHandled[BBCases[i].Value]); + PredCases.push_back(BBCases[i]); + ++NewSuccessors[BBCases[i].Dest]; + PTIHandled.erase(BBCases[i].Value); // This constant is taken care of } - // Now that the successors are updated, create the new Switch instruction. - SwitchInst *NewSI = - Builder.CreateSwitch(CV, PredDefault, PredCases.size()); - NewSI->setDebugLoc(PTI->getDebugLoc()); - for (ValueEqualityComparisonCase &V : PredCases) - NewSI->addCase(V.Value, V.Dest); + // If there are any constants vectored to BB that TI doesn't handle, + // they must go to the default destination of TI. + for (ConstantInt *I : PTIHandled) { + if (PredHasWeights || SuccHasWeights) + Weights.push_back(WeightsForHandled[I]); + PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault)); + ++NewSuccessors[BBDefault]; + } + } + + // Okay, at this point, we know which new successor Pred will get. Make + // sure we update the number of entries in the PHI nodes for these + // successors. + for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor : + NewSuccessors) { + for (auto I : seq(0, NewSuccessor.second)) { + (void)I; + AddPredecessorToBlock(NewSuccessor.first, Pred, BB); + } + if (!is_contained(successors(Pred), NewSuccessor.first)) + Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first}); + } + + Builder.SetInsertPoint(PTI); + // Convert pointer to int before we switch. + if (CV->getType()->isPointerTy()) { + CV = + Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr"); + } + + // Now that the successors are updated, create the new Switch instruction. + SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size()); + NewSI->setDebugLoc(PTI->getDebugLoc()); + for (ValueEqualityComparisonCase &V : PredCases) + NewSI->addCase(V.Value, V.Dest); + + if (PredHasWeights || SuccHasWeights) { + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(Weights); + + SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); - if (PredHasWeights || SuccHasWeights) { - // Halve the weights if any of them cannot fit in an uint32_t - FitWeights(Weights); + setBranchWeights(NewSI, MDWeights); + } - SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); + EraseTerminatorAndDCECond(PTI); - setBranchWeights(NewSI, MDWeights); + // Okay, last check. If BB is still a successor of PSI, then we must + // have an infinite loop case. If so, add an infinitely looping block + // to handle the case to preserve the behavior of the code. + BasicBlock *InfLoopBlock = nullptr; + for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i) + if (NewSI->getSuccessor(i) == BB) { + if (!InfLoopBlock) { + // Insert it at the end of the function, because it's either code, + // or it won't matter if it's hot. :) + InfLoopBlock = + BasicBlock::Create(BB->getContext(), "infloop", BB->getParent()); + BranchInst::Create(InfLoopBlock, InfLoopBlock); + Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock}); } + NewSI->setSuccessor(i, InfLoopBlock); + } - EraseTerminatorAndDCECond(PTI); - - // Okay, last check. If BB is still a successor of PSI, then we must - // have an infinite loop case. If so, add an infinitely looping block - // to handle the case to preserve the behavior of the code. - BasicBlock *InfLoopBlock = nullptr; - for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i) - if (NewSI->getSuccessor(i) == BB) { - if (!InfLoopBlock) { - // Insert it at the end of the function, because it's either code, - // or it won't matter if it's hot. :) - InfLoopBlock = BasicBlock::Create(BB->getContext(), "infloop", - BB->getParent()); - BranchInst::Create(InfLoopBlock, InfLoopBlock); - } - NewSI->setSuccessor(i, InfLoopBlock); - } + if (InfLoopBlock) + Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock}); - Changed = true; + Updates.push_back({DominatorTree::Delete, Pred, BB}); + + if (DTU) + DTU->applyUpdates(Updates); + + ++NumFoldValueComparisonIntoPredecessors; + return true; +} + +/// The specified terminator is a value equality comparison instruction +/// (either a switch or a branch on "X == c"). +/// See if any of the predecessors of the terminator block are value comparisons +/// on the same value. If so, and if safe to do so, fold them together. +bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI, + IRBuilder<> &Builder) { + BasicBlock *BB = TI->getParent(); + Value *CV = isValueEqualityComparison(TI); // CondVal + assert(CV && "Not a comparison?"); + + bool Changed = false; + + SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); + while (!Preds.empty()) { + BasicBlock *Pred = Preds.pop_back_val(); + Instruction *PTI = Pred->getTerminator(); + + // Don't try to fold into itself. + if (Pred == BB) + continue; + + // See if the predecessor is a comparison with the same value. + Value *PCV = isValueEqualityComparison(PTI); // PredCondVal + if (PCV != CV) + continue; + + SmallSetVector<BasicBlock *, 4> FailBlocks; + if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) { + for (auto *Succ : FailBlocks) { + if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU)) + return false; + } } + + PerformValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder); + Changed = true; } return Changed; } @@ -1248,7 +1364,7 @@ static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, return true; } -static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I); +static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false); /// Given a conditional branch that goes to BB1 and BB2, hoist any common code /// in the two blocks up into the branch block. The caller of this function @@ -1285,6 +1401,12 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, BasicBlock *BIParent = BI->getParent(); bool Changed = false; + + auto _ = make_scope_exit([&]() { + if (Changed) + ++NumHoistCommonCode; + }); + do { // If we are hoisting the terminator instruction, don't move one (making a // broken BB), instead clone it, and remove BI. @@ -1353,6 +1475,7 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, I2->eraseFromParent(); Changed = true; } + ++NumHoistCommonInstrs; I1 = &*BB1_Itr++; I2 = &*BB2_Itr++; @@ -1407,6 +1530,8 @@ HoistTerminator: I2->replaceAllUsesWith(NT); NT->takeName(I1); } + Changed = true; + ++NumHoistCommonInstrs; // Ensure terminator gets a debug location, even an unknown one, in case // it involves inlinable calls. @@ -1448,12 +1573,20 @@ HoistTerminator: } } + SmallVector<DominatorTree::UpdateType, 4> Updates; + // Update any PHI nodes in our new successors. - for (BasicBlock *Succ : successors(BB1)) + for (BasicBlock *Succ : successors(BB1)) { AddPredecessorToBlock(Succ, BIParent, BB1); + Updates.push_back({DominatorTree::Insert, BIParent, Succ}); + } + for (BasicBlock *Succ : successors(BI)) + Updates.push_back({DominatorTree::Delete, BIParent, Succ}); EraseTerminatorAndDCECond(BI); - return true; + if (DTU) + DTU->applyUpdates(Updates); + return Changed; } // Check lifetime markers. @@ -1744,7 +1877,8 @@ namespace { /// true, sink any common code from the predecessors to BB. /// We also allow one predecessor to end with conditional branch (but no more /// than one). -static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { +static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, + DomTreeUpdater *DTU) { // We support two situations: // (1) all incoming arcs are unconditional // (2) one incoming arc is conditional @@ -1800,7 +1934,6 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { if (UnconditionalPreds.size() < 2) return false; - bool Changed = false; // We take a two-step approach to tail sinking. First we scan from the end of // each block upwards in lockstep. If the n'th instruction from the end of each // block can be sunk, those instructions are added to ValuesToSink and we @@ -1820,6 +1953,12 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { --LRI; } + // If no instructions can be sunk, early-return. + if (ScanIdx == 0) + return false; + + bool Changed = false; + auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) { unsigned NumPHIdValues = 0; for (auto *I : *LRI) @@ -1834,7 +1973,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { return NumPHIInsts <= 1; }; - if (ScanIdx > 0 && Cond) { + if (Cond) { // Check if we would actually sink anything first! This mutates the CFG and // adds an extra block. The goal in doing this is to allow instructions that // couldn't be sunk before to be sunk - obviously, speculatable instructions @@ -1857,7 +1996,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n"); // We have a conditional edge and we're going to sink some instructions. // Insert a new block postdominating all blocks we're going to sink from. - if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split")) + if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU)) // Edges couldn't be split. return false; Changed = true; @@ -1875,7 +2014,8 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { // sink presuming a later value will also be sunk, but stop half way through // and never actually sink it which means we produce more PHIs than intended. // This is unlikely in practice though. - for (unsigned SinkIdx = 0; SinkIdx != ScanIdx; ++SinkIdx) { + unsigned SinkIdx = 0; + for (; SinkIdx != ScanIdx; ++SinkIdx) { LLVM_DEBUG(dbgs() << "SINK: Sink: " << *UnconditionalPreds[0]->getTerminator()->getPrevNode() << "\n"); @@ -1890,11 +2030,18 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { break; } - if (!sinkLastInstruction(UnconditionalPreds)) - return Changed; - NumSinkCommons++; + if (!sinkLastInstruction(UnconditionalPreds)) { + LLVM_DEBUG( + dbgs() + << "SINK: stopping here, failed to actually sink instruction!\n"); + break; + } + + NumSinkCommonInstrs++; Changed = true; } + if (SinkIdx != 0) + ++NumSinkCommonCode; return Changed; } @@ -1938,7 +2085,9 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, // Look for a store to the same pointer in BrBB. unsigned MaxNumInstToLookAt = 9; - for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug())) { + // Skip pseudo probe intrinsic calls which are not really killing any memory + // accesses. + for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) { if (!MaxNumInstToLookAt) break; --MaxNumInstToLookAt; @@ -1959,6 +2108,65 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, return nullptr; } +/// Estimate the cost of the insertion(s) and check that the PHI nodes can be +/// converted to selects. +static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, + BasicBlock *EndBB, + unsigned &SpeculatedInstructions, + int &BudgetRemaining, + const TargetTransformInfo &TTI) { + TargetTransformInfo::TargetCostKind CostKind = + BB->getParent()->hasMinSize() + ? TargetTransformInfo::TCK_CodeSize + : TargetTransformInfo::TCK_SizeAndLatency; + + bool HaveRewritablePHIs = false; + for (PHINode &PN : EndBB->phis()) { + Value *OrigV = PN.getIncomingValueForBlock(BB); + Value *ThenV = PN.getIncomingValueForBlock(ThenBB); + + // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf. + // Skip PHIs which are trivial. + if (ThenV == OrigV) + continue; + + BudgetRemaining -= + TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + + // Don't convert to selects if we could remove undefined behavior instead. + if (passingValueIsAlwaysUndefined(OrigV, &PN) || + passingValueIsAlwaysUndefined(ThenV, &PN)) + return false; + + HaveRewritablePHIs = true; + ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV); + ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV); + if (!OrigCE && !ThenCE) + continue; // Known safe and cheap. + + if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || + (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) + return false; + unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0; + unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0; + unsigned MaxCost = + 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; + if (OrigCost + ThenCost > MaxCost) + return false; + + // Account for the cost of an unfolded ConstantExpr which could end up + // getting expanded into Instructions. + // FIXME: This doesn't account for how many operations are combined in the + // constant expression. + ++SpeculatedInstructions; + if (SpeculatedInstructions > 1) + return false; + } + + return HaveRewritablePHIs; +} + /// Speculate a conditional basic block flattening the CFG. /// /// Note that this is a very risky transform currently. Speculating @@ -2005,6 +2213,8 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, BasicBlock *BB = BI->getParent(); BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0); + int BudgetRemaining = + PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; // If ThenBB is actually on the false edge of the conditional branch, remember // to swap the select operands later. @@ -2037,6 +2247,14 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, continue; } + // Skip pseudo probes. The consequence is we lose track of the branch + // probability for ThenBB, which is fine since the optimization here takes + // place regardless of the branch probability. + if (isa<PseudoProbeInst>(I)) { + SpeculatedDbgIntrinsics.push_back(I); + continue; + } + // Only speculatively execute a single instruction (not counting the // terminator) for now. ++SpeculatedInstructions; @@ -2082,50 +2300,13 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, return false; } - // Check that the PHI nodes can be converted to selects. - bool HaveRewritablePHIs = false; - for (PHINode &PN : EndBB->phis()) { - Value *OrigV = PN.getIncomingValueForBlock(BB); - Value *ThenV = PN.getIncomingValueForBlock(ThenBB); - - // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf. - // Skip PHIs which are trivial. - if (ThenV == OrigV) - continue; - - // Don't convert to selects if we could remove undefined behavior instead. - if (passingValueIsAlwaysUndefined(OrigV, &PN) || - passingValueIsAlwaysUndefined(ThenV, &PN)) - return false; - - HaveRewritablePHIs = true; - ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV); - ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV); - if (!OrigCE && !ThenCE) - continue; // Known safe and cheap. - - if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || - (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) - return false; - unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0; - unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0; - unsigned MaxCost = - 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; - if (OrigCost + ThenCost > MaxCost) - return false; - - // Account for the cost of an unfolded ConstantExpr which could end up - // getting expanded into Instructions. - // FIXME: This doesn't account for how many operations are combined in the - // constant expression. - ++SpeculatedInstructions; - if (SpeculatedInstructions > 1) - return false; - } - - // If there are no PHIs to process, bail early. This helps ensure idempotence - // as well. - if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue)) + // Check that we can insert the selects and that it's not too expensive to do + // so. + bool Convert = SpeculatedStore != nullptr; + Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB, + SpeculatedInstructions, + BudgetRemaining, TTI); + if (!Convert || BudgetRemaining < 0) return false; // If we get here, we can hoist the instruction and if-convert. @@ -2199,6 +2380,12 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { for (Instruction &I : BB->instructionsWithoutDebug()) { if (Size > MaxSmallBlockSize) return false; // Don't clone large BB's. + + // Can't fold blocks that contain noduplicate or convergent calls. + if (CallInst *CI = dyn_cast<CallInst>(&I)) + if (CI->cannotDuplicate() || CI->isConvergent()) + return false; + // We will delete Phis while threading, so Phis should not be accounted in // block's size if (!isa<PHINode>(I)) @@ -2221,8 +2408,8 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { /// If we have a conditional branch on a PHI node value that is defined in the /// same block as the branch and if any PHI entries are constants, thread edges /// corresponding to that entry to be branches to their ultimate destination. -static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL, - AssumptionCache *AC) { +static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU, + const DataLayout &DL, AssumptionCache *AC) { BasicBlock *BB = BI->getParent(); PHINode *PN = dyn_cast<PHINode>(BI->getCondition()); // NOTE: we currently cannot transform this case if the PHI node is used @@ -2240,13 +2427,6 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL, if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false; - // Can't fold blocks that contain noduplicate or convergent calls. - if (any_of(*BB, [](const Instruction &I) { - const CallInst *CI = dyn_cast<CallInst>(&I); - return CI && (CI->cannotDuplicate() || CI->isConvergent()); - })) - return false; - // Okay, this is a simple enough basic block. See if any phi values are // constants. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { @@ -2265,6 +2445,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL, if (isa<IndirectBrInst>(PredBB->getTerminator())) continue; + SmallVector<DominatorTree::UpdateType, 3> Updates; + // The dest block might have PHI nodes, other predecessors and other // difficult cases. Instead of being smart about this, just insert a new // block that jumps to the destination block, effectively splitting @@ -2273,6 +2455,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL, BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge", RealDest->getParent(), RealDest); BranchInst *CritEdgeBranch = BranchInst::Create(RealDest, EdgeBB); + Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest}); CritEdgeBranch->setDebugLoc(BI->getDebugLoc()); // Update PHI nodes. @@ -2331,8 +2514,14 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL, PredBBTI->setSuccessor(i, EdgeBB); } + Updates.push_back({DominatorTree::Insert, PredBB, EdgeBB}); + Updates.push_back({DominatorTree::Delete, PredBB, BB}); + + if (DTU) + DTU->applyUpdates(Updates); + // Recurse, simplifying any other constants. - return FoldCondBranchOnPHI(BI, DL, AC) || true; + return FoldCondBranchOnPHI(BI, DTU, DL, AC) || true; } return false; @@ -2341,7 +2530,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL, /// Given a BB that starts with the specified two-entry PHI node, /// see if we can eliminate it. static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, - const DataLayout &DL) { + DomTreeUpdater *DTU, const DataLayout &DL) { // Ok, this is a two entry PHI node. Check to see if this is a simple "if // statement", which has a very simple dominance structure. Basically, we // are trying to find the condition that is being branched on, which @@ -2374,11 +2563,13 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, int BudgetRemaining = TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; + bool Changed = false; for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) { PHINode *PN = cast<PHINode>(II++); if (Value *V = SimplifyInstruction(PN, {DL, PN})) { PN->replaceAllUsesWith(V); PN->eraseFromParent(); + Changed = true; continue; } @@ -2386,7 +2577,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, BudgetRemaining, TTI) || !DominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts, BudgetRemaining, TTI)) - return false; + return Changed; } // If we folded the first phi, PN dangles at this point. Refresh it. If @@ -2413,7 +2604,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, isa<BinaryOperator>(IfCond)) && !CanHoistNotFromBothValues(PN->getIncomingValue(0), PN->getIncomingValue(1))) - return false; + return Changed; // If all PHI nodes are promotable, check to make sure that all instructions // in the predecessor blocks can be promoted as well. If not, we won't be able @@ -2427,11 +2618,12 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, } else { DomBlock = *pred_begin(IfBlock1); for (BasicBlock::iterator I = IfBlock1->begin(); !I->isTerminator(); ++I) - if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) { + if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I) && + !isa<PseudoProbeInst>(I)) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control flow, so // the xform is not worth it. - return false; + return Changed; } } @@ -2440,11 +2632,12 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, } else { DomBlock = *pred_begin(IfBlock2); for (BasicBlock::iterator I = IfBlock2->begin(); !I->isTerminator(); ++I) - if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) { + if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I) && + !isa<PseudoProbeInst>(I)) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control flow, so // the xform is not worth it. - return false; + return Changed; } } assert(DomBlock && "Failed to find root DomBlock"); @@ -2487,7 +2680,18 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, Instruction *OldTI = DomBlock->getTerminator(); Builder.SetInsertPoint(OldTI); Builder.CreateBr(BB); + + SmallVector<DominatorTree::UpdateType, 3> Updates; + if (DTU) { + Updates.push_back({DominatorTree::Insert, DomBlock, BB}); + for (auto *Successor : successors(DomBlock)) + Updates.push_back({DominatorTree::Delete, DomBlock, Successor}); + } + OldTI->eraseFromParent(); + if (DTU) + DTU->applyUpdates(Updates); + return true; } @@ -2496,9 +2700,11 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, /// introducing a select if the return values disagree. bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI, IRBuilder<> &Builder) { + auto *BB = BI->getParent(); assert(BI->isConditional() && "Must be a conditional branch"); BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); + // NOTE: destinations may match, this could be degenerate uncond branch. ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator()); ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator()); @@ -2515,10 +2721,17 @@ bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI, // there is no return value for this function, just change the // branch into a return. if (FalseRet->getNumOperands() == 0) { - TrueSucc->removePredecessor(BI->getParent()); - FalseSucc->removePredecessor(BI->getParent()); + TrueSucc->removePredecessor(BB); + FalseSucc->removePredecessor(BB); Builder.CreateRetVoid(); EraseTerminatorAndDCECond(BI); + if (DTU) { + SmallVector<DominatorTree::UpdateType, 2> Updates; + Updates.push_back({DominatorTree::Delete, BB, TrueSucc}); + if (TrueSucc != FalseSucc) + Updates.push_back({DominatorTree::Delete, BB, FalseSucc}); + DTU->applyUpdates(Updates); + } return true; } @@ -2530,10 +2743,10 @@ bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI, // Unwrap any PHI nodes in the return blocks. if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue)) if (TVPN->getParent() == TrueSucc) - TrueValue = TVPN->getIncomingValueForBlock(BI->getParent()); + TrueValue = TVPN->getIncomingValueForBlock(BB); if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue)) if (FVPN->getParent() == FalseSucc) - FalseValue = FVPN->getIncomingValueForBlock(BI->getParent()); + FalseValue = FVPN->getIncomingValueForBlock(BB); // In order for this transformation to be safe, we must be able to // unconditionally execute both operands to the return. This is @@ -2549,8 +2762,8 @@ bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI, // Okay, we collected all the mapped values and checked them for sanity, and // defined to really do this transformation. First, update the CFG. - TrueSucc->removePredecessor(BI->getParent()); - FalseSucc->removePredecessor(BI->getParent()); + TrueSucc->removePredecessor(BB); + FalseSucc->removePredecessor(BB); // Insert select instructions where needed. Value *BrCond = BI->getCondition(); @@ -2575,27 +2788,17 @@ bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI, << *TrueSucc << "\nFALSEBLOCK: " << *FalseSucc); EraseTerminatorAndDCECond(BI); + if (DTU) { + SmallVector<DominatorTree::UpdateType, 2> Updates; + Updates.push_back({DominatorTree::Delete, BB, TrueSucc}); + if (TrueSucc != FalseSucc) + Updates.push_back({DominatorTree::Delete, BB, FalseSucc}); + DTU->applyUpdates(Updates); + } return true; } -/// Return true if the given instruction is available -/// in its predecessor block. If yes, the instruction will be removed. -static bool tryCSEWithPredecessor(Instruction *Inst, BasicBlock *PB) { - if (!isa<BinaryOperator>(Inst) && !isa<CmpInst>(Inst)) - return false; - for (Instruction &I : *PB) { - Instruction *PBI = &I; - // Check whether Inst and PBI generate the same value. - if (Inst->isIdenticalTo(PBI)) { - Inst->replaceAllUsesWith(PBI); - Inst->eraseFromParent(); - return true; - } - } - return false; -} - /// Return true if either PBI or BI has branch weight available, and store /// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does /// not have branch weight, use 1:1 as its weight. @@ -2619,63 +2822,174 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, } } +// Determine if the two branches share a common destination, +// and deduce a glue that we need to use to join branch's conditions +// to arrive at the common destination. +static Optional<std::pair<Instruction::BinaryOps, bool>> +CheckIfCondBranchesShareCommonDestination(BranchInst *BI, BranchInst *PBI) { + assert(BI && PBI && BI->isConditional() && PBI->isConditional() && + "Both blocks must end with a conditional branches."); + assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) && + "PredBB must be a predecessor of BB."); + + if (PBI->getSuccessor(0) == BI->getSuccessor(0)) + return {{Instruction::Or, false}}; + else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) + return {{Instruction::And, false}}; + else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) + return {{Instruction::And, true}}; + else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) + return {{Instruction::Or, true}}; + return None; +} + +static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, + DomTreeUpdater *DTU, + MemorySSAUpdater *MSSAU) { + BasicBlock *BB = BI->getParent(); + BasicBlock *PredBlock = PBI->getParent(); + + // Determine if the two branches share a common destination. + Instruction::BinaryOps Opc; + bool InvertPredCond; + std::tie(Opc, InvertPredCond) = + *CheckIfCondBranchesShareCommonDestination(BI, PBI); + + LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); + + IRBuilder<> Builder(PBI); + // The builder is used to create instructions to eliminate the branch in BB. + // If BB's terminator has !annotation metadata, add it to the new + // instructions. + Builder.CollectMetadataToCopy(BB->getTerminator(), + {LLVMContext::MD_annotation}); + + // If we need to invert the condition in the pred block to match, do so now. + if (InvertPredCond) { + Value *NewCond = PBI->getCondition(); + if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) { + CmpInst *CI = cast<CmpInst>(NewCond); + CI->setPredicate(CI->getInversePredicate()); + } else { + NewCond = + Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not"); + } + + PBI->setCondition(NewCond); + PBI->swapSuccessors(); + } + + BasicBlock *UniqueSucc = + PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1); + + // Before cloning instructions, notify the successor basic block that it + // is about to have a new predecessor. This will update PHI nodes, + // which will allow us to update live-out uses of bonus instructions. + AddPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU); + + // Try to update branch weights. + uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; + if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight, + SuccTrueWeight, SuccFalseWeight)) { + SmallVector<uint64_t, 8> NewWeights; + + if (PBI->getSuccessor(0) == BB) { + // PBI: br i1 %x, BB, FalseDest + // BI: br i1 %y, UniqueSucc, FalseDest + // TrueWeight is TrueWeight for PBI * TrueWeight for BI. + NewWeights.push_back(PredTrueWeight * SuccTrueWeight); + // FalseWeight is FalseWeight for PBI * TotalWeight for BI + + // TrueWeight for PBI * FalseWeight for BI. + // We assume that total weights of a BranchInst can fit into 32 bits. + // Therefore, we will not have overflow using 64-bit arithmetic. + NewWeights.push_back(PredFalseWeight * + (SuccFalseWeight + SuccTrueWeight) + + PredTrueWeight * SuccFalseWeight); + } else { + // PBI: br i1 %x, TrueDest, BB + // BI: br i1 %y, TrueDest, UniqueSucc + // TrueWeight is TrueWeight for PBI * TotalWeight for BI + + // FalseWeight for PBI * TrueWeight for BI. + NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) + + PredFalseWeight * SuccTrueWeight); + // FalseWeight is FalseWeight for PBI * FalseWeight for BI. + NewWeights.push_back(PredFalseWeight * SuccFalseWeight); + } + + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(NewWeights); + + SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end()); + setBranchWeights(PBI, MDWeights[0], MDWeights[1]); + + // TODO: If BB is reachable from all paths through PredBlock, then we + // could replace PBI's branch probabilities with BI's. + } else + PBI->setMetadata(LLVMContext::MD_prof, nullptr); + + // Now, update the CFG. + PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc); + + if (DTU) + DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc}, + {DominatorTree::Delete, PredBlock, BB}}); + + // If BI was a loop latch, it may have had associated loop metadata. + // We need to copy it to the new latch, that is, PBI. + if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop)) + PBI->setMetadata(LLVMContext::MD_loop, LoopMD); + + ValueToValueMapTy VMap; // maps original values to cloned values + CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap); + + // Now that the Cond was cloned into the predecessor basic block, + // or/and the two conditions together. + Instruction *NewCond = cast<Instruction>(Builder.CreateBinOp( + Opc, PBI->getCondition(), VMap[BI->getCondition()], "or.cond")); + PBI->setCondition(NewCond); + + // Copy any debug value intrinsics into the end of PredBlock. + for (Instruction &I : *BB) { + if (isa<DbgInfoIntrinsic>(I)) { + Instruction *NewI = I.clone(); + RemapInstruction(NewI, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + NewI->insertBefore(PBI); + } + } + + ++NumFoldBranchToCommonDest; + return true; +} + /// If this basic block is simple enough, and if a predecessor branches to us /// and one of our successors, fold the block into the predecessor and use /// logical operations to pick the right destination. -bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, +bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, + MemorySSAUpdater *MSSAU, + const TargetTransformInfo *TTI, unsigned BonusInstThreshold) { + // If this block ends with an unconditional branch, + // let SpeculativelyExecuteBB() deal with it. + if (!BI->isConditional()) + return false; + BasicBlock *BB = BI->getParent(); const unsigned PredCount = pred_size(BB); bool Changed = false; - Instruction *Cond = nullptr; - if (BI->isConditional()) - Cond = dyn_cast<Instruction>(BI->getCondition()); - else { - // For unconditional branch, check for a simple CFG pattern, where - // BB has a single predecessor and BB's successor is also its predecessor's - // successor. If such pattern exists, check for CSE between BB and its - // predecessor. - if (BasicBlock *PB = BB->getSinglePredecessor()) - if (BranchInst *PBI = dyn_cast<BranchInst>(PB->getTerminator())) - if (PBI->isConditional() && - (BI->getSuccessor(0) == PBI->getSuccessor(0) || - BI->getSuccessor(0) == PBI->getSuccessor(1))) { - for (auto I = BB->instructionsWithoutDebug().begin(), - E = BB->instructionsWithoutDebug().end(); - I != E;) { - Instruction *Curr = &*I++; - if (isa<CmpInst>(Curr)) { - Cond = Curr; - break; - } - // Quit if we can't remove this instruction. - if (!tryCSEWithPredecessor(Curr, PB)) - return Changed; - Changed = true; - } - } + TargetTransformInfo::TargetCostKind CostKind = + BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize + : TargetTransformInfo::TCK_SizeAndLatency; - if (!Cond) - return Changed; - } + Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || Cond->getParent() != BB || !Cond->hasOneUse()) return Changed; - // Make sure the instruction after the condition is the cond branch. - BasicBlock::iterator CondIt = ++Cond->getIterator(); - - // Ignore dbg intrinsics. - while (isa<DbgInfoIntrinsic>(CondIt)) - ++CondIt; - - if (&*CondIt != BI) - return Changed; - // Only allow this transformation if computing the condition doesn't involve // too many instructions and these involved instructions can be executed // unconditionally. We denote all involved instructions except the condition @@ -2683,19 +2997,16 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, // number of the bonus instructions we'll need to create when cloning into // each predecessor does not exceed a certain threshold. unsigned NumBonusInsts = 0; - for (auto I = BB->begin(); Cond != &*I; ++I) { - // Ignore dbg intrinsics. - if (isa<DbgInfoIntrinsic>(I)) + for (Instruction &I : *BB) { + // Don't check the branch condition comparison itself. + if (&I == Cond) continue; - if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(&*I)) - return Changed; - // I has only one use and can be executed unconditionally. - Instruction *User = dyn_cast<Instruction>(I->user_back()); - if (User == nullptr || User->getParent() != BB) + // Ignore dbg intrinsics, and the terminator. + if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I)) + continue; + // I must be safe to execute unconditionally. + if (!isSafeToSpeculativelyExecute(&I)) return Changed; - // I is used in the same BB. Since BI uses Cond and doesn't have more slots - // to use any other instruction, User must be an instruction between next(I) - // and Cond. // Account for the cost of duplicating this instruction into each // predecessor. @@ -2715,9 +3026,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, return Changed; // Finally, don't infinitely unroll conditional loops. - BasicBlock *TrueDest = BI->getSuccessor(0); - BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : nullptr; - if (TrueDest == BB || FalseDest == BB) + if (is_contained(successors(BB), BB)) return Changed; for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { @@ -2727,222 +3036,31 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, // Check that we have two conditional branches. If there is a PHI node in // the common successor, verify that the same value flows in from both // blocks. - SmallVector<PHINode *, 4> PHIs; - if (!PBI || PBI->isUnconditional() || - (BI->isConditional() && !SafeToMergeTerminators(BI, PBI)) || - (!BI->isConditional() && - !isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs))) + if (!PBI || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI)) continue; // Determine if the two branches share a common destination. - Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd; - bool InvertPredCond = false; - - if (BI->isConditional()) { - if (PBI->getSuccessor(0) == TrueDest) { - Opc = Instruction::Or; - } else if (PBI->getSuccessor(1) == FalseDest) { - Opc = Instruction::And; - } else if (PBI->getSuccessor(0) == FalseDest) { - Opc = Instruction::And; - InvertPredCond = true; - } else if (PBI->getSuccessor(1) == TrueDest) { - Opc = Instruction::Or; - InvertPredCond = true; - } else { - continue; - } - } else { - if (PBI->getSuccessor(0) != TrueDest && PBI->getSuccessor(1) != TrueDest) - continue; - } - - LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); - Changed = true; - - IRBuilder<> Builder(PBI); - - // If we need to invert the condition in the pred block to match, do so now. - if (InvertPredCond) { - Value *NewCond = PBI->getCondition(); - - if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) { - CmpInst *CI = cast<CmpInst>(NewCond); - CI->setPredicate(CI->getInversePredicate()); - } else { - NewCond = - Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not"); - } + Instruction::BinaryOps Opc; + bool InvertPredCond; + if (auto Recepie = CheckIfCondBranchesShareCommonDestination(BI, PBI)) + std::tie(Opc, InvertPredCond) = *Recepie; + else + continue; - PBI->setCondition(NewCond); - PBI->swapSuccessors(); - } + // Check the cost of inserting the necessary logic before performing the + // transformation. + if (TTI) { + Type *Ty = BI->getCondition()->getType(); + unsigned Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind); + if (InvertPredCond && (!PBI->getCondition()->hasOneUse() || + !isa<CmpInst>(PBI->getCondition()))) + Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind); - // If we have bonus instructions, clone them into the predecessor block. - // Note that there may be multiple predecessor blocks, so we cannot move - // bonus instructions to a predecessor block. - ValueToValueMapTy VMap; // maps original values to cloned values - // We already make sure Cond is the last instruction before BI. Therefore, - // all instructions before Cond other than DbgInfoIntrinsic are bonus - // instructions. - for (auto BonusInst = BB->begin(); Cond != &*BonusInst; ++BonusInst) { - if (isa<DbgInfoIntrinsic>(BonusInst)) + if (Cost > BranchFoldThreshold) continue; - Instruction *NewBonusInst = BonusInst->clone(); - - // When we fold the bonus instructions we want to make sure we - // reset their debug locations in order to avoid stepping on dead - // code caused by folding dead branches. - NewBonusInst->setDebugLoc(DebugLoc()); - - RemapInstruction(NewBonusInst, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - VMap[&*BonusInst] = NewBonusInst; - - // If we moved a load, we cannot any longer claim any knowledge about - // its potential value. The previous information might have been valid - // only given the branch precondition. - // For an analogous reason, we must also drop all the metadata whose - // semantics we don't understand. - NewBonusInst->dropUnknownNonDebugMetadata(); - - PredBlock->getInstList().insert(PBI->getIterator(), NewBonusInst); - NewBonusInst->takeName(&*BonusInst); - BonusInst->setName(BonusInst->getName() + ".old"); } - // Clone Cond into the predecessor basic block, and or/and the - // two conditions together. - Instruction *CondInPred = Cond->clone(); - - // Reset the condition debug location to avoid jumping on dead code - // as the result of folding dead branches. - CondInPred->setDebugLoc(DebugLoc()); - - RemapInstruction(CondInPred, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - PredBlock->getInstList().insert(PBI->getIterator(), CondInPred); - CondInPred->takeName(Cond); - Cond->setName(CondInPred->getName() + ".old"); - - if (BI->isConditional()) { - Instruction *NewCond = cast<Instruction>( - Builder.CreateBinOp(Opc, PBI->getCondition(), CondInPred, "or.cond")); - PBI->setCondition(NewCond); - - uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; - bool HasWeights = - extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight, - SuccTrueWeight, SuccFalseWeight); - SmallVector<uint64_t, 8> NewWeights; - - if (PBI->getSuccessor(0) == BB) { - if (HasWeights) { - // PBI: br i1 %x, BB, FalseDest - // BI: br i1 %y, TrueDest, FalseDest - // TrueWeight is TrueWeight for PBI * TrueWeight for BI. - NewWeights.push_back(PredTrueWeight * SuccTrueWeight); - // FalseWeight is FalseWeight for PBI * TotalWeight for BI + - // TrueWeight for PBI * FalseWeight for BI. - // We assume that total weights of a BranchInst can fit into 32 bits. - // Therefore, we will not have overflow using 64-bit arithmetic. - NewWeights.push_back(PredFalseWeight * - (SuccFalseWeight + SuccTrueWeight) + - PredTrueWeight * SuccFalseWeight); - } - AddPredecessorToBlock(TrueDest, PredBlock, BB, MSSAU); - PBI->setSuccessor(0, TrueDest); - } - if (PBI->getSuccessor(1) == BB) { - if (HasWeights) { - // PBI: br i1 %x, TrueDest, BB - // BI: br i1 %y, TrueDest, FalseDest - // TrueWeight is TrueWeight for PBI * TotalWeight for BI + - // FalseWeight for PBI * TrueWeight for BI. - NewWeights.push_back(PredTrueWeight * - (SuccFalseWeight + SuccTrueWeight) + - PredFalseWeight * SuccTrueWeight); - // FalseWeight is FalseWeight for PBI * FalseWeight for BI. - NewWeights.push_back(PredFalseWeight * SuccFalseWeight); - } - AddPredecessorToBlock(FalseDest, PredBlock, BB, MSSAU); - PBI->setSuccessor(1, FalseDest); - } - if (NewWeights.size() == 2) { - // Halve the weights if any of them cannot fit in an uint32_t - FitWeights(NewWeights); - - SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), - NewWeights.end()); - setBranchWeights(PBI, MDWeights[0], MDWeights[1]); - } else - PBI->setMetadata(LLVMContext::MD_prof, nullptr); - } else { - // Update PHI nodes in the common successors. - for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { - ConstantInt *PBI_C = cast<ConstantInt>( - PHIs[i]->getIncomingValueForBlock(PBI->getParent())); - assert(PBI_C->getType()->isIntegerTy(1)); - Instruction *MergedCond = nullptr; - if (PBI->getSuccessor(0) == TrueDest) { - // Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value) - // PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value) - // is false: !PBI_Cond and BI_Value - Instruction *NotCond = cast<Instruction>( - Builder.CreateNot(PBI->getCondition(), "not.cond")); - MergedCond = cast<Instruction>( - Builder.CreateBinOp(Instruction::And, NotCond, CondInPred, - "and.cond")); - if (PBI_C->isOne()) - MergedCond = cast<Instruction>(Builder.CreateBinOp( - Instruction::Or, PBI->getCondition(), MergedCond, "or.cond")); - } else { - // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C) - // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond) - // is false: PBI_Cond and BI_Value - MergedCond = cast<Instruction>(Builder.CreateBinOp( - Instruction::And, PBI->getCondition(), CondInPred, "and.cond")); - if (PBI_C->isOne()) { - Instruction *NotCond = cast<Instruction>( - Builder.CreateNot(PBI->getCondition(), "not.cond")); - MergedCond = cast<Instruction>(Builder.CreateBinOp( - Instruction::Or, NotCond, MergedCond, "or.cond")); - } - } - // Update PHI Node. - PHIs[i]->setIncomingValueForBlock(PBI->getParent(), MergedCond); - } - - // PBI is changed to branch to TrueDest below. Remove itself from - // potential phis from all other successors. - if (MSSAU) - MSSAU->changeCondBranchToUnconditionalTo(PBI, TrueDest); - - // Change PBI from Conditional to Unconditional. - BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI); - EraseTerminatorAndDCECond(PBI, MSSAU); - PBI = New_PBI; - } - - // If BI was a loop latch, it may have had associated loop metadata. - // We need to copy it to the new latch, that is, PBI. - if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop)) - PBI->setMetadata(LLVMContext::MD_loop, LoopMD); - - // TODO: If BB is reachable from all paths through PredBlock, then we - // could replace PBI's branch probabilities with BI's. - - // Copy any debug value intrinsics into the end of PredBlock. - for (Instruction &I : *BB) { - if (isa<DbgInfoIntrinsic>(I)) { - Instruction *NewI = I.clone(); - RemapInstruction(NewI, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - NewI->insertBefore(PBI); - } - } - - return Changed; + return PerformBranchToCommonDestFolding(BI, PBI, DTU, MSSAU); } return Changed; } @@ -3015,12 +3133,10 @@ static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, return PHI; } -static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, - BasicBlock *QTB, BasicBlock *QFB, - BasicBlock *PostBB, Value *Address, - bool InvertPCond, bool InvertQCond, - const DataLayout &DL, - const TargetTransformInfo &TTI) { +static bool mergeConditionalStoreToAddress( + BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, + BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, + DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { // For every pointer, there must be exactly two stores, one coming from // PTB or PFB, and the other from QTB or QFB. We don't support more than one // store (to any address) in PTB,PFB or QTB,QFB. @@ -3095,7 +3211,7 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, return true; }; - const SmallVector<StoreInst *, 2> FreeStores = {PStore, QStore}; + const std::array<StoreInst *, 2> FreeStores = {PStore, QStore}; if (!MergeCondStoresAggressively && (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) || !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores))) @@ -3109,8 +3225,8 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, // If QTB does not exist, then QFB's only predecessor has a conditional // branch to QFB and PostBB. BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor(); - BasicBlock *NewBB = SplitBlockPredecessors(PostBB, { QFB, TruePred}, - "condstore.split"); + BasicBlock *NewBB = + SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU); if (!NewBB) return false; PostBB = NewBB; @@ -3139,8 +3255,9 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, QPred = QB.CreateNot(QPred); Value *CombinedPred = QB.CreateOr(PPred, QPred); - auto *T = - SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(), false); + auto *T = SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(), + /*Unreachable=*/false, + /*BranchWeights=*/nullptr, DTU); QB.SetInsertPoint(T); StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address)); AAMDNodes AAMD; @@ -3160,7 +3277,7 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, } static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, - const DataLayout &DL, + DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { // The intention here is to find diamonds or triangles (see below) where each // conditional block contains a store to the same address. Both of these @@ -3262,16 +3379,17 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, bool Changed = false; for (auto *Address : CommonAddresses) - Changed |= mergeConditionalStoreToAddress( - PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL, TTI); + Changed |= + mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address, + InvertPCond, InvertQCond, DTU, DL, TTI); return Changed; } - /// If the previous block ended with a widenable branch, determine if reusing /// the target block is profitable and legal. This will have the effect of /// "widening" PBI, but doesn't require us to reason about hosting safety. -static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { +static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, + DomTreeUpdater *DTU) { // TODO: This can be generalized in two important ways: // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input // values from the PBI edge. @@ -3294,15 +3412,25 @@ static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { if (BI->getSuccessor(1) != IfFalseBB && // no inf looping BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability NoSideEffects(*BI->getParent())) { - BI->getSuccessor(1)->removePredecessor(BI->getParent()); + auto *OldSuccessor = BI->getSuccessor(1); + OldSuccessor->removePredecessor(BI->getParent()); BI->setSuccessor(1, IfFalseBB); + if (DTU) + DTU->applyUpdates( + {{DominatorTree::Insert, BI->getParent(), IfFalseBB}, + {DominatorTree::Delete, BI->getParent(), OldSuccessor}}); return true; } if (BI->getSuccessor(0) != IfFalseBB && // no inf looping BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability NoSideEffects(*BI->getParent())) { - BI->getSuccessor(0)->removePredecessor(BI->getParent()); + auto *OldSuccessor = BI->getSuccessor(0); + OldSuccessor->removePredecessor(BI->getParent()); BI->setSuccessor(0, IfFalseBB); + if (DTU) + DTU->applyUpdates( + {{DominatorTree::Insert, BI->getParent(), IfFalseBB}, + {DominatorTree::Delete, BI->getParent(), OldSuccessor}}); return true; } return false; @@ -3313,6 +3441,7 @@ static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { /// that PBI and BI are both conditional branches, and BI is in one of the /// successor blocks of PBI - PBI branches to BI. static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, + DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { assert(PBI->isConditional() && BI->isConditional()); @@ -3366,7 +3495,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // If the previous block ended with a widenable branch, determine if reusing // the target block is profitable and legal. This will have the effect of // "widening" PBI, but doesn't require us to reason about hosting safety. - if (tryWidenCondBranchToCondBranch(PBI, BI)) + if (tryWidenCondBranchToCondBranch(PBI, BI, DTU)) return true; if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition())) @@ -3376,7 +3505,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // If both branches are conditional and both contain stores to the same // address, remove the stores from the conditionals and create a conditional // merged store at the end. - if (MergeCondStores && mergeConditionalStores(PBI, BI, DL, TTI)) + if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI)) return true; // If this is a conditional branch in an empty block, and if any @@ -3419,6 +3548,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // case, it would be unsafe to hoist the operation into a select instruction. BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); + BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1); unsigned NumPhis = 0; for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II); ++II, ++NumPhis) { @@ -3444,6 +3574,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent() << "AND: " << *BI->getParent()); + SmallVector<DominatorTree::UpdateType, 5> Updates; + // If OtherDest *is* BB, then BB is a basic block with a single conditional // branch in it, where one edge (OtherDest) goes back to itself but the other // exits. We don't *know* that the program avoids the infinite loop @@ -3457,6 +3589,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, BasicBlock *InfLoopBlock = BasicBlock::Create(BB->getContext(), "infloop", BB->getParent()); BranchInst::Create(InfLoopBlock, InfLoopBlock); + Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock}); OtherDest = InfLoopBlock; } @@ -3483,6 +3616,12 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, PBI->setSuccessor(0, CommonDest); PBI->setSuccessor(1, OtherDest); + Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest}); + Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest}); + + if (DTU) + DTU->applyUpdates(Updates); + // Update branch weight for PBI. uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; uint64_t PredCommon, PredOther, SuccCommon, SuccOther; @@ -3562,6 +3701,7 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm, BasicBlock *FalseBB, uint32_t TrueWeight, uint32_t FalseWeight) { + auto *BB = OldTerm->getParent(); // Remove any superfluous successor edges from the CFG. // First, figure out which successors to preserve. // If TrueBB and FalseBB are equal, only try to preserve one copy of that @@ -3569,6 +3709,8 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm, BasicBlock *KeepEdge1 = TrueBB; BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr; + SmallSetVector<BasicBlock *, 2> RemovedSuccessors; + // Then remove the rest. for (BasicBlock *Succ : successors(OldTerm)) { // Make sure only to keep exactly one copy of each edge. @@ -3576,9 +3718,13 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm, KeepEdge1 = nullptr; else if (Succ == KeepEdge2) KeepEdge2 = nullptr; - else - Succ->removePredecessor(OldTerm->getParent(), + else { + Succ->removePredecessor(BB, /*KeepOneInputPHIs=*/true); + + if (Succ != TrueBB && Succ != FalseBB) + RemovedSuccessors.insert(Succ); + } } IRBuilder<> Builder(OldTerm); @@ -3586,11 +3732,11 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm, // Insert an appropriate new terminator. if (!KeepEdge1 && !KeepEdge2) { - if (TrueBB == FalseBB) + if (TrueBB == FalseBB) { // We were only looking for one successor, and it was present. // Create an unconditional branch to it. Builder.CreateBr(TrueBB); - else { + } else { // We found both of the successors we were looking for. // Create a conditional branch sharing the condition of the select. BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB); @@ -3605,15 +3751,25 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm, // One of the selected values was a successor, but the other wasn't. // Insert an unconditional branch to the one that was found; // the edge to the one that wasn't must be unreachable. - if (!KeepEdge1) + if (!KeepEdge1) { // Only TrueBB was found. Builder.CreateBr(TrueBB); - else + } else { // Only FalseBB was found. Builder.CreateBr(FalseBB); + } } EraseTerminatorAndDCECond(OldTerm); + + if (DTU) { + SmallVector<DominatorTree::UpdateType, 2> Updates; + Updates.reserve(RemovedSuccessors.size()); + for (auto *RemovedSuccessor : RemovedSuccessors) + Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor}); + DTU->applyUpdates(Updates); + } + return true; } @@ -3768,6 +3924,8 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt( ICI->replaceAllUsesWith(DefaultCst); ICI->eraseFromParent(); + SmallVector<DominatorTree::UpdateType, 2> Updates; + // Okay, the switch goes to this block on a default value. Add an edge from // the switch to the merge point on the compared value. BasicBlock *NewBB = @@ -3781,13 +3939,17 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt( SIW.setSuccessorWeight(0, *NewW); } SIW.addCase(Cst, NewBB, NewW); + Updates.push_back({DominatorTree::Insert, Pred, NewBB}); } // NewBB branches to the phi block, add the uncond branch and the phi entry. Builder.SetInsertPoint(NewBB); Builder.SetCurrentDebugLocation(SI->getDebugLoc()); Builder.CreateBr(SuccBlock); + Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock}); PHIUse->addIncoming(NewCst, NewBB); + if (DTU) + DTU->applyUpdates(Updates); return true; } @@ -3821,7 +3983,7 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI, if (UsedICmps <= 1) return false; - bool TrueWhenEqual = (Cond->getOpcode() == Instruction::Or); + bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value())); // There might be duplicate constants in the list, which the switch // instruction can't handle, remove them now. @@ -3853,12 +4015,15 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI, << " cases into SWITCH. BB is:\n" << *BB); + SmallVector<DominatorTree::UpdateType, 2> Updates; + // If there are any extra values that couldn't be folded into the switch // then we evaluate them with an explicit branch first. Split the block // right before the condbr to handle it. if (ExtraCase) { - BasicBlock *NewBB = - BB->splitBasicBlock(BI->getIterator(), "switch.early.test"); + BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr, + /*MSSAU=*/nullptr, "switch.early.test"); + // Remove the uncond branch added to the old block. Instruction *OldTI = BB->getTerminator(); Builder.SetInsertPoint(OldTI); @@ -3870,6 +4035,8 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI, OldTI->eraseFromParent(); + Updates.push_back({DominatorTree::Insert, BB, EdgeBB}); + // If there are PHI nodes in EdgeBB, then we need to add a new entry to them // for the edge we just added. AddPredecessorToBlock(EdgeBB, BB, NewBB); @@ -3905,6 +4072,8 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI, // Erase the old branch instruction. EraseTerminatorAndDCECond(BI); + if (DTU) + DTU->applyUpdates(Updates); LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n'); return true; @@ -3921,17 +4090,36 @@ bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { return false; } +// Check if cleanup block is empty +static bool isCleanupBlockEmpty(iterator_range<BasicBlock::iterator> R) { + for (Instruction &I : R) { + auto *II = dyn_cast<IntrinsicInst>(&I); + if (!II) + return false; + + Intrinsic::ID IntrinsicID = II->getIntrinsicID(); + switch (IntrinsicID) { + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::dbg_label: + case Intrinsic::lifetime_end: + break; + default: + return false; + } + } + return true; +} + // Simplify resume that is shared by several landing pads (phi of landing pad). bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) { BasicBlock *BB = RI->getParent(); - // Check that there are no other instructions except for debug intrinsics - // between the phi of landing pads (RI->getValue()) and resume instruction. - BasicBlock::iterator I = cast<Instruction>(RI->getValue())->getIterator(), - E = RI->getIterator(); - while (++I != E) - if (!isa<DbgInfoIntrinsic>(I)) - return false; + // Check that there are no other instructions except for debug and lifetime + // intrinsics between the phi's and resume instruction. + if (!isCleanupBlockEmpty( + make_range(RI->getParent()->getFirstNonPHI(), BB->getTerminator()))) + return false; SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks; auto *PhiLPInst = cast<PHINode>(RI->getValue()); @@ -3952,17 +4140,8 @@ bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) { if (IncomingValue != LandingPad) continue; - bool isTrivial = true; - - I = IncomingBB->getFirstNonPHI()->getIterator(); - E = IncomingBB->getTerminator()->getIterator(); - while (++I != E) - if (!isa<DbgInfoIntrinsic>(I)) { - isTrivial = false; - break; - } - - if (isTrivial) + if (isCleanupBlockEmpty( + make_range(LandingPad->getNextNode(), IncomingBB->getTerminator()))) TrivialUnwindBlocks.insert(IncomingBB); } @@ -3981,7 +4160,8 @@ bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) { for (pred_iterator PI = pred_begin(TrivialBB), PE = pred_end(TrivialBB); PI != PE;) { BasicBlock *Pred = *PI++; - removeUnwindEdge(Pred); + removeUnwindEdge(Pred, DTU); + ++NumInvokes; } // In each SimplifyCFG run, only the current processed block can be erased. @@ -3991,37 +4171,21 @@ bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) { // predecessors. TrivialBB->getTerminator()->eraseFromParent(); new UnreachableInst(RI->getContext(), TrivialBB); + if (DTU) + DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}}); } // Delete the resume block if all its predecessors have been removed. - if (pred_empty(BB)) - BB->eraseFromParent(); + if (pred_empty(BB)) { + if (DTU) + DTU->deleteBB(BB); + else + BB->eraseFromParent(); + } return !TrivialUnwindBlocks.empty(); } -// Check if cleanup block is empty -static bool isCleanupBlockEmpty(Instruction *Inst, Instruction *RI) { - BasicBlock::iterator I = Inst->getIterator(), E = RI->getIterator(); - while (++I != E) { - auto *II = dyn_cast<IntrinsicInst>(I); - if (!II) - return false; - - Intrinsic::ID IntrinsicID = II->getIntrinsicID(); - switch (IntrinsicID) { - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::dbg_label: - case Intrinsic::lifetime_end: - break; - default: - return false; - } - } - return true; -} - // Simplify resume that is only used by a single (non-phi) landing pad. bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) { BasicBlock *BB = RI->getParent(); @@ -4030,23 +4194,26 @@ bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) { "Resume must unwind the exception that caused control to here"); // Check that there are no other instructions except for debug intrinsics. - if (!isCleanupBlockEmpty(LPInst, RI)) + if (!isCleanupBlockEmpty( + make_range<Instruction *>(LPInst->getNextNode(), RI))) return false; // Turn all invokes that unwind here into calls and delete the basic block. for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { BasicBlock *Pred = *PI++; - removeUnwindEdge(Pred); + removeUnwindEdge(Pred, DTU); + ++NumInvokes; } // The landingpad is now unreachable. Zap it. - if (LoopHeaders) - LoopHeaders->erase(BB); - BB->eraseFromParent(); + if (DTU) + DTU->deleteBB(BB); + else + BB->eraseFromParent(); return true; } -static bool removeEmptyCleanup(CleanupReturnInst *RI) { +static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) { // If this is a trivial cleanup pad that executes no instructions, it can be // eliminated. If the cleanup pad continues to the caller, any predecessor // that is an EH pad will be updated to continue to the caller and any @@ -4067,7 +4234,8 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI) { return false; // Check that there are no other instructions except for benign intrinsics. - if (!isCleanupBlockEmpty(CPInst, RI)) + if (!isCleanupBlockEmpty( + make_range<Instruction *>(CPInst->getNextNode(), RI))) return false; // If the cleanup return we are simplifying unwinds to the caller, this will @@ -4152,19 +4320,32 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI) { } } + std::vector<DominatorTree::UpdateType> Updates; + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { // The iterator must be updated here because we are removing this pred. BasicBlock *PredBB = *PI++; if (UnwindDest == nullptr) { - removeUnwindEdge(PredBB); + if (DTU) + DTU->applyUpdates(Updates); + Updates.clear(); + removeUnwindEdge(PredBB, DTU); + ++NumInvokes; } else { Instruction *TI = PredBB->getTerminator(); TI->replaceUsesOfWith(BB, UnwindDest); + Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest}); + Updates.push_back({DominatorTree::Delete, PredBB, BB}); } } - // The cleanup pad is now unreachable. Zap it. - BB->eraseFromParent(); + if (DTU) { + DTU->applyUpdates(Updates); + DTU->deleteBB(BB); + } else + // The cleanup pad is now unreachable. Zap it. + BB->eraseFromParent(); + return true; } @@ -4211,7 +4392,7 @@ bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) { if (mergeCleanupPad(RI)) return true; - if (removeEmptyCleanup(RI)) + if (removeEmptyCleanup(RI, DTU)) return true; return false; @@ -4242,15 +4423,16 @@ bool SimplifyCFGOpt::simplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { BasicBlock *Pred = UncondBranchPreds.pop_back_val(); LLVM_DEBUG(dbgs() << "FOLDING: " << *BB << "INTO UNCOND BRANCH PRED: " << *Pred); - (void)FoldReturnIntoUncondBranch(RI, BB, Pred); + (void)FoldReturnIntoUncondBranch(RI, BB, Pred, DTU); } // If we eliminated all predecessors of the block, delete the block now. if (pred_empty(BB)) { // We know there are no successors, so just nuke the block. - if (LoopHeaders) - LoopHeaders->erase(BB); - BB->eraseFromParent(); + if (DTU) + DTU->deleteBB(BB); + else + BB->eraseFromParent(); } return true; @@ -4330,18 +4512,26 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { if (&BB->front() != UI) return Changed; - SmallVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB)); + std::vector<DominatorTree::UpdateType> Updates; + + SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB)); for (unsigned i = 0, e = Preds.size(); i != e; ++i) { - Instruction *TI = Preds[i]->getTerminator(); + auto *Predecessor = Preds[i]; + Instruction *TI = Predecessor->getTerminator(); IRBuilder<> Builder(TI); if (auto *BI = dyn_cast<BranchInst>(TI)) { - if (BI->isUnconditional()) { - assert(BI->getSuccessor(0) == BB && "Incorrect CFG"); + // We could either have a proper unconditional branch, + // or a degenerate conditional branch with matching destinations. + if (all_of(BI->successors(), + [BB](auto *Successor) { return Successor == BB; })) { new UnreachableInst(TI->getContext(), TI); TI->eraseFromParent(); Changed = true; } else { + assert(BI->isConditional() && "Can't get here with an uncond branch."); Value* Cond = BI->getCondition(); + assert(BI->getSuccessor(0) != BI->getSuccessor(1) && + "The destinations are guaranteed to be different here."); if (BI->getSuccessor(0) == BB) { Builder.CreateAssumption(Builder.CreateNot(Cond)); Builder.CreateBr(BI->getSuccessor(1)); @@ -4353,6 +4543,7 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { EraseTerminatorAndDCECond(BI); Changed = true; } + Updates.push_back({DominatorTree::Delete, Predecessor, BB}); } else if (auto *SI = dyn_cast<SwitchInst>(TI)) { SwitchInstProfUpdateWrapper SU(*SI); for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) { @@ -4365,14 +4556,23 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { e = SU->case_end(); Changed = true; } + // Note that the default destination can't be removed! + if (SI->getDefaultDest() != BB) + Updates.push_back({DominatorTree::Delete, Predecessor, BB}); } else if (auto *II = dyn_cast<InvokeInst>(TI)) { if (II->getUnwindDest() == BB) { - removeUnwindEdge(TI->getParent()); + if (DTU) + DTU->applyUpdates(Updates); + Updates.clear(); + removeUnwindEdge(TI->getParent(), DTU); Changed = true; } } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) { if (CSI->getUnwindDest() == BB) { - removeUnwindEdge(TI->getParent()); + if (DTU) + DTU->applyUpdates(Updates); + Updates.clear(); + removeUnwindEdge(TI->getParent(), DTU); Changed = true; continue; } @@ -4387,35 +4587,53 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { Changed = true; } } + Updates.push_back({DominatorTree::Delete, Predecessor, BB}); if (CSI->getNumHandlers() == 0) { - BasicBlock *CatchSwitchBB = CSI->getParent(); if (CSI->hasUnwindDest()) { - // Redirect preds to the unwind dest - CatchSwitchBB->replaceAllUsesWith(CSI->getUnwindDest()); + // Redirect all predecessors of the block containing CatchSwitchInst + // to instead branch to the CatchSwitchInst's unwind destination. + for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) { + Updates.push_back({DominatorTree::Insert, PredecessorOfPredecessor, + CSI->getUnwindDest()}); + Updates.push_back( + {DominatorTree::Delete, PredecessorOfPredecessor, Predecessor}); + } + Predecessor->replaceAllUsesWith(CSI->getUnwindDest()); } else { // Rewrite all preds to unwind to caller (or from invoke to call). - SmallVector<BasicBlock *, 8> EHPreds(predecessors(CatchSwitchBB)); + if (DTU) + DTU->applyUpdates(Updates); + Updates.clear(); + SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor)); for (BasicBlock *EHPred : EHPreds) - removeUnwindEdge(EHPred); + removeUnwindEdge(EHPred, DTU); } // The catchswitch is no longer reachable. new UnreachableInst(CSI->getContext(), CSI); CSI->eraseFromParent(); Changed = true; } - } else if (isa<CleanupReturnInst>(TI)) { + } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) { + (void)CRI; + assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB && + "Expected to always have an unwind to BB."); + Updates.push_back({DominatorTree::Delete, Predecessor, BB}); new UnreachableInst(TI->getContext(), TI); TI->eraseFromParent(); Changed = true; } } + if (DTU) + DTU->applyUpdates(Updates); + // If this block is now dead, remove it. if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) { // We know there are no successors, so just nuke the block. - if (LoopHeaders) - LoopHeaders->erase(BB); - BB->eraseFromParent(); + if (DTU) + DTU->deleteBB(BB); + else + BB->eraseFromParent(); return true; } @@ -4433,15 +4651,26 @@ static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) { return true; } -static void createUnreachableSwitchDefault(SwitchInst *Switch) { +static void createUnreachableSwitchDefault(SwitchInst *Switch, + DomTreeUpdater *DTU) { LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); - BasicBlock *NewDefaultBlock = - SplitBlockPredecessors(Switch->getDefaultDest(), Switch->getParent(), ""); + auto *BB = Switch->getParent(); + BasicBlock *NewDefaultBlock = SplitBlockPredecessors( + Switch->getDefaultDest(), Switch->getParent(), "", DTU); + auto *OrigDefaultBlock = Switch->getDefaultDest(); Switch->setDefaultDest(&*NewDefaultBlock); - SplitBlock(&*NewDefaultBlock, &NewDefaultBlock->front()); + if (DTU) + DTU->applyUpdates({{DominatorTree::Insert, BB, &*NewDefaultBlock}, + {DominatorTree::Delete, BB, OrigDefaultBlock}}); + SplitBlock(&*NewDefaultBlock, &NewDefaultBlock->front(), DTU); + SmallVector<DominatorTree::UpdateType, 2> Updates; + for (auto *Successor : successors(NewDefaultBlock)) + Updates.push_back({DominatorTree::Delete, NewDefaultBlock, Successor}); auto *NewTerminator = NewDefaultBlock->getTerminator(); new UnreachableInst(Switch->getContext(), NewTerminator); EraseTerminatorAndDCECond(NewTerminator); + if (DTU) + DTU->applyUpdates(Updates); } /// Turn a switch with two reachable destinations into an integer range @@ -4453,6 +4682,8 @@ bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI, bool HasDefault = !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); + auto *BB = SI->getParent(); + // Partition the cases into two sets with different destinations. BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr; BasicBlock *DestB = nullptr; @@ -4556,17 +4787,23 @@ bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI, // Clean up the default block - it may have phis or other instructions before // the unreachable terminator. if (!HasDefault) - createUnreachableSwitchDefault(SI); + createUnreachableSwitchDefault(SI, DTU); + + auto *UnreachableDefault = SI->getDefaultDest(); // Drop the switch. SI->eraseFromParent(); + if (!HasDefault && DTU) + DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}}); + return true; } /// Compute masked bits for the condition of a switch /// and use it to remove dead cases. -static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, +static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, + AssumptionCache *AC, const DataLayout &DL) { Value *Cond = SI->getCondition(); unsigned Bits = Cond->getType()->getIntegerBitWidth(); @@ -4580,11 +4817,15 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, // Gather dead cases. SmallVector<ConstantInt *, 8> DeadCases; + SmallMapVector<BasicBlock *, int, 8> NumPerSuccessorCases; for (auto &Case : SI->cases()) { + auto *Successor = Case.getCaseSuccessor(); + ++NumPerSuccessorCases[Successor]; const APInt &CaseVal = Case.getCaseValue()->getValue(); if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) || (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) { DeadCases.push_back(Case.getCaseValue()); + --NumPerSuccessorCases[Successor]; LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal << " is dead.\n"); } @@ -4602,7 +4843,7 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, if (HasDefault && DeadCases.empty() && NumUnknownBits < 64 /* avoid overflow */ && SI->getNumCases() == (1ULL << NumUnknownBits)) { - createUnreachableSwitchDefault(SI); + createUnreachableSwitchDefault(SI, DTU); return true; } @@ -4619,6 +4860,13 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, SIW.removeCase(CaseI); } + std::vector<DominatorTree::UpdateType> Updates; + for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases) + if (I.second == 0) + Updates.push_back({DominatorTree::Delete, SI->getParent(), I.first}); + if (DTU) + DTU->applyUpdates(Updates); + return true; } @@ -4974,30 +5222,41 @@ static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector, // a select, fixing up PHI nodes and basic blocks. static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI, Value *SelectValue, - IRBuilder<> &Builder) { + IRBuilder<> &Builder, + DomTreeUpdater *DTU) { + std::vector<DominatorTree::UpdateType> Updates; + BasicBlock *SelectBB = SI->getParent(); + BasicBlock *DestBB = PHI->getParent(); + + if (!is_contained(predecessors(DestBB), SelectBB)) + Updates.push_back({DominatorTree::Insert, SelectBB, DestBB}); + Builder.CreateBr(DestBB); + + // Remove the switch. + while (PHI->getBasicBlockIndex(SelectBB) >= 0) PHI->removeIncomingValue(SelectBB); PHI->addIncoming(SelectValue, SelectBB); - Builder.CreateBr(PHI->getParent()); - - // Remove the switch. for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) { BasicBlock *Succ = SI->getSuccessor(i); - if (Succ == PHI->getParent()) + if (Succ == DestBB) continue; Succ->removePredecessor(SelectBB); + Updates.push_back({DominatorTree::Delete, SelectBB, Succ}); } SI->eraseFromParent(); + if (DTU) + DTU->applyUpdates(Updates); } /// If the switch is only used to initialize one or more /// phi nodes in a common successor block with only two different /// constant values, replace the switch with select. static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder, - const DataLayout &DL, + DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { Value *const Cond = SI->getCondition(); PHINode *PHI = nullptr; @@ -5017,7 +5276,7 @@ static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder, Value *SelectValue = ConvertTwoCaseSwitch(UniqueResults, DefaultResult, Cond, Builder); if (SelectValue) { - RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder); + RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder, DTU); return true; } // The switch couldn't be converted into a select. @@ -5402,11 +5661,12 @@ static void reuseTableCompare( /// successor block with different constant values, replace the switch with /// lookup tables. static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, - const DataLayout &DL, + DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); - Function *Fn = SI->getParent()->getParent(); + BasicBlock *BB = SI->getParent(); + Function *Fn = BB->getParent(); // Only build lookup table when we have a target that supports it or the // attribute is not set. if (!TTI.shouldBuildLookupTables() || @@ -5500,6 +5760,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes)) return false; + std::vector<DominatorTree::UpdateType> Updates; + // Create the BB that does the lookups. Module &Mod = *CommonDest->getParent()->getParent(); BasicBlock *LookupBB = BasicBlock::Create( @@ -5532,6 +5794,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, if (!DefaultIsReachable || GeneratingCoveredLookupTable) { Builder.CreateBr(LookupBB); + Updates.push_back({DominatorTree::Insert, BB, LookupBB}); // Note: We call removeProdecessor later since we need to be able to get the // PHI value for the default case in case we're using a bit mask. } else { @@ -5539,6 +5802,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize)); RangeCheckBranch = Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + Updates.push_back({DominatorTree::Insert, BB, LookupBB}); } // Populate the BB that does the lookups. @@ -5576,16 +5840,18 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, Value *LoBit = Builder.CreateTrunc( Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit"); Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest()); - + Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB}); + Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()}); Builder.SetInsertPoint(LookupBB); - AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, SI->getParent()); + AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB); } if (!DefaultIsReachable || GeneratingCoveredLookupTable) { // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later, // do not delete PHINodes here. - SI->getDefaultDest()->removePredecessor(SI->getParent(), + SI->getDefaultDest()->removePredecessor(BB, /*KeepOneInputPHIs=*/true); + Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()}); } bool ReturnedEarly = false; @@ -5622,19 +5888,29 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, PHI->addIncoming(Result, LookupBB); } - if (!ReturnedEarly) + if (!ReturnedEarly) { Builder.CreateBr(CommonDest); + Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest}); + } // Remove the switch. + SmallSetVector<BasicBlock *, 8> RemovedSuccessors; for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) { BasicBlock *Succ = SI->getSuccessor(i); if (Succ == SI->getDefaultDest()) continue; - Succ->removePredecessor(SI->getParent()); + Succ->removePredecessor(BB); + RemovedSuccessors.insert(Succ); } SI->eraseFromParent(); + if (DTU) { + for (BasicBlock *RemovedSuccessor : RemovedSuccessors) + Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor}); + DTU->applyUpdates(Updates); + } + ++NumLookupTables; if (NeedMask) ++NumLookupTablesHoles; @@ -5770,10 +6046,10 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { return requestResimplify(); // Remove unreachable cases. - if (eliminateDeadSwitchCases(SI, Options.AC, DL)) + if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL)) return requestResimplify(); - if (switchToSelect(SI, Builder, DL, TTI)) + if (switchToSelect(SI, Builder, DTU, DL, TTI)) return requestResimplify(); if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI)) @@ -5785,7 +6061,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { // CVP. Therefore, only apply this transformation during late stages of the // optimisation pipeline. if (Options.ConvertSwitchToLookupTable && - SwitchToLookupTable(SI, Builder, DL, TTI)) + SwitchToLookupTable(SI, Builder, DTU, DL, TTI)) return requestResimplify(); if (ReduceSwitchRange(SI, Builder, DL, TTI)) @@ -5800,9 +6076,12 @@ bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) { // Eliminate redundant destinations. SmallPtrSet<Value *, 8> Succs; + SmallSetVector<BasicBlock *, 8> RemovedSuccs; for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { BasicBlock *Dest = IBI->getDestination(i); if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) { + if (!Dest->hasAddressTaken()) + RemovedSuccs.insert(Dest); Dest->removePredecessor(BB); IBI->removeDestination(i); --i; @@ -5811,6 +6090,14 @@ bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) { } } + if (DTU) { + std::vector<DominatorTree::UpdateType> Updates; + Updates.reserve(RemovedSuccs.size()); + for (auto *RemovedSucc : RemovedSuccs) + Updates.push_back({DominatorTree::Delete, BB, RemovedSucc}); + DTU->applyUpdates(Updates); + } + if (IBI->getNumDestinations() == 0) { // If the indirectbr has no successors, change it to unreachable. new UnreachableInst(IBI->getContext(), IBI); @@ -5854,7 +6141,7 @@ bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) { /// block when the inputs in the phi are the same for the two blocks being /// merged. In some cases, this could result in removal of the PHI entirely. static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, - BasicBlock *BB) { + BasicBlock *BB, DomTreeUpdater *DTU) { auto Succ = BB->getUniqueSuccessor(); assert(Succ); // If there's a phi in the successor block, we'd likely have to introduce @@ -5875,6 +6162,8 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, if (!BI2 || !BI2->isIdenticalTo(BI)) continue; + std::vector<DominatorTree::UpdateType> Updates; + // We've found an identical block. Update our predecessors to take that // path instead and make ourselves dead. SmallPtrSet<BasicBlock *, 16> Preds; @@ -5884,6 +6173,8 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, assert(II->getNormalDest() != BB && II->getUnwindDest() == BB && "unexpected successor"); II->setUnwindDest(OtherPred); + Updates.push_back({DominatorTree::Insert, Pred, OtherPred}); + Updates.push_back({DominatorTree::Delete, Pred, BB}); } // The debug info in OtherPred doesn't cover the merged control flow that @@ -5899,11 +6190,14 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, Succs.insert(succ_begin(BB), succ_end(BB)); for (BasicBlock *Succ : Succs) { Succ->removePredecessor(BB); + Updates.push_back({DominatorTree::Delete, BB, Succ}); } IRBuilder<> Builder(BI); Builder.CreateUnreachable(); BI->eraseFromParent(); + if (DTU) + DTU->applyUpdates(Updates); return true; } return false; @@ -5928,11 +6222,11 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI, // backedge, so we can eliminate BB. bool NeedCanonicalLoop = Options.NeedCanonicalLoop && - (LoopHeaders && BB->hasNPredecessorsOrMore(2) && - (LoopHeaders->count(BB) || LoopHeaders->count(Succ))); + (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) && + (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ))); BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && - !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB)) + !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU)) return true; // If the only instruction in the block is a seteq/setne comparison against a @@ -5951,7 +6245,7 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI, if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) { for (++I; isa<DbgInfoIntrinsic>(I); ++I) ; - if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB)) + if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB, DTU)) return true; } @@ -5959,7 +6253,8 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI, // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. - if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold)) + if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, + Options.BonusInstThreshold)) return requestResimplify(); return false; } @@ -6022,7 +6317,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold)) + if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, + Options.BonusInstThreshold)) return requestResimplify(); // We have a conditional branch to two blocks that are only reachable @@ -6031,8 +6327,9 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // can hoist it up to the branching block. if (BI->getSuccessor(0)->getSinglePredecessor()) { if (BI->getSuccessor(1)->getSinglePredecessor()) { - if (HoistThenElseCodeToIf(BI, TTI)) - return requestResimplify(); + if (HoistCommon && Options.HoistCommonInsts) + if (HoistThenElseCodeToIf(BI, TTI)) + return requestResimplify(); } else { // If Successor #1 has multiple preds, we may be able to conditionally // execute Successor #0 if it branches to Successor #1. @@ -6056,14 +6353,14 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // through this block if any PHI node entries are constants. if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) if (PN->getParent() == BI->getParent()) - if (FoldCondBranchOnPHI(BI, DL, Options.AC)) + if (FoldCondBranchOnPHI(BI, DTU, DL, Options.AC)) return requestResimplify(); // Scan predecessor blocks for conditional branches. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) if (PBI != BI && PBI->isConditional()) - if (SimplifyCondBranchToCondBranch(PBI, BI, DL, TTI)) + if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI)) return requestResimplify(); // Look for diamond patterns. @@ -6071,14 +6368,14 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB)) if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator())) if (PBI != BI && PBI->isConditional()) - if (mergeConditionalStores(PBI, BI, DL, TTI)) + if (mergeConditionalStores(PBI, BI, DTU, DL, TTI)) return requestResimplify(); return false; } /// Check if passing a value to an instruction will cause undefined behavior. -static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { +static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) { Constant *C = dyn_cast<Constant>(V); if (!C) return false; @@ -6101,12 +6398,15 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { // Look through GEPs. A load from a GEP derived from NULL is still undefined if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use)) - if (GEP->getPointerOperand() == I) - return passingValueIsAlwaysUndefined(V, GEP); + if (GEP->getPointerOperand() == I) { + if (!GEP->isInBounds() || !GEP->hasAllZeroIndices()) + PtrValueMayBeModified = true; + return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified); + } // Look through bitcasts. if (BitCastInst *BC = dyn_cast<BitCastInst>(Use)) - return passingValueIsAlwaysUndefined(V, BC); + return passingValueIsAlwaysUndefined(V, BC, PtrValueMayBeModified); // Load from null is undefined. if (LoadInst *LI = dyn_cast<LoadInst>(Use)) @@ -6121,24 +6421,51 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { SI->getPointerAddressSpace())) && SI->getPointerOperand() == I; - // A call to null is undefined. - if (auto *CB = dyn_cast<CallBase>(Use)) - return !NullPointerIsDefined(CB->getFunction()) && - CB->getCalledOperand() == I; + if (auto *CB = dyn_cast<CallBase>(Use)) { + if (C->isNullValue() && NullPointerIsDefined(CB->getFunction())) + return false; + // A call to null is undefined. + if (CB->getCalledOperand() == I) + return true; + + if (C->isNullValue()) { + for (const llvm::Use &Arg : CB->args()) + if (Arg == I) { + unsigned ArgIdx = CB->getArgOperandNo(&Arg); + if (CB->paramHasAttr(ArgIdx, Attribute::NonNull) && + CB->paramHasAttr(ArgIdx, Attribute::NoUndef)) { + // Passing null to a nonnnull+noundef argument is undefined. + return !PtrValueMayBeModified; + } + } + } else if (isa<UndefValue>(C)) { + // Passing undef to a noundef argument is undefined. + for (const llvm::Use &Arg : CB->args()) + if (Arg == I) { + unsigned ArgIdx = CB->getArgOperandNo(&Arg); + if (CB->paramHasAttr(ArgIdx, Attribute::NoUndef)) { + // Passing undef to a noundef argument is undefined. + return true; + } + } + } + } } return false; } /// If BB has an incoming value that will always trigger undefined behavior /// (eg. null pointer dereference), remove the branch leading here. -static bool removeUndefIntroducingPredecessor(BasicBlock *BB) { +static bool removeUndefIntroducingPredecessor(BasicBlock *BB, + DomTreeUpdater *DTU) { for (PHINode &PHI : BB->phis()) for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) { - Instruction *T = PHI.getIncomingBlock(i)->getTerminator(); + BasicBlock *Predecessor = PHI.getIncomingBlock(i); + Instruction *T = Predecessor->getTerminator(); IRBuilder<> Builder(T); if (BranchInst *BI = dyn_cast<BranchInst>(T)) { - BB->removePredecessor(PHI.getIncomingBlock(i)); + BB->removePredecessor(Predecessor); // Turn uncoditional branches into unreachables and remove the dead // destination from conditional branches. if (BI->isUnconditional()) @@ -6147,6 +6474,8 @@ static bool removeUndefIntroducingPredecessor(BasicBlock *BB) { Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1) : BI->getSuccessor(0)); BI->eraseFromParent(); + if (DTU) + DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}}); return true; } // TODO: SwitchInst. @@ -6155,7 +6484,7 @@ static bool removeUndefIntroducingPredecessor(BasicBlock *BB) { return false; } -bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { +bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) { bool Changed = false; assert(BB && BB->getParent() && "Block not embedded in function!"); @@ -6166,28 +6495,29 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) || BB->getSinglePredecessor() == BB) { LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB); - DeleteDeadBlock(BB); + DeleteDeadBlock(BB, DTU); return true; } // Check to see if we can constant propagate this terminator instruction // away... - Changed |= ConstantFoldTerminator(BB, true); + Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true, + /*TLI=*/nullptr, DTU); // Check for and eliminate duplicate PHI nodes in this block. Changed |= EliminateDuplicatePHINodes(BB); // Check for and remove branches that will always cause undefined behavior. - Changed |= removeUndefIntroducingPredecessor(BB); + Changed |= removeUndefIntroducingPredecessor(BB, DTU); // Merge basic blocks into their predecessor if there is only one distinct // pred, and if there is only one distinct successor of the predecessor, and // if there are no PHI nodes. - if (MergeBlockIntoPredecessor(BB)) + if (MergeBlockIntoPredecessor(BB, DTU)) return true; if (SinkCommon && Options.SinkCommonInsts) - Changed |= SinkCommonCodeFromPredecessors(BB); + Changed |= SinkCommonCodeFromPredecessors(BB, DTU); IRBuilder<> Builder(BB); @@ -6196,7 +6526,7 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { // eliminate it, do so now. if (auto *PN = dyn_cast<PHINode>(BB->begin())) if (PN->getNumIncomingValues() == 2) - Changed |= FoldTwoEntryPHINode(PN, TTI, DL); + Changed |= FoldTwoEntryPHINode(PN, TTI, DTU, DL); } Instruction *Terminator = BB->getTerminator(); @@ -6228,7 +6558,23 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { return Changed; } +bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { + bool Changed = simplifyOnceImpl(BB); + + assert((!RequireAndPreserveDomTree || + (DTU && + DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) && + "Failed to maintain validity of domtree!"); + + return Changed; +} + bool SimplifyCFGOpt::run(BasicBlock *BB) { + assert((!RequireAndPreserveDomTree || + (DTU && + DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) && + "Original domtree is invalid?"); + bool Changed = false; // Repeated simplify BB as long as resimplification is requested. @@ -6244,9 +6590,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { } bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, - const SimplifyCFGOptions &Options, - SmallPtrSetImpl<BasicBlock *> *LoopHeaders) { - return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), LoopHeaders, - Options) + DomTreeUpdater *DTU, const SimplifyCFGOptions &Options, + ArrayRef<WeakVH> LoopHeaders) { + return SimplifyCFGOpt(TTI, RequireAndPreserveDomTree ? DTU : nullptr, + BB->getModule()->getDataLayout(), LoopHeaders, Options) .run(BB); } |