diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-05-16 19:46:52 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-05-16 19:46:52 +0000 |
commit | 6b3f41ed88e8e440e11a4fbf20b6600529f80049 (patch) | |
tree | 928b056f24a634d628c80238dbbf10d41b1a71d5 /lib/Transforms/Scalar | |
parent | c46e6a5940c50058e00c0c5f9123fd82e338d29a (diff) |
Vendor import of llvm trunk r303197:vendor/llvm/llvm-trunk-r303197
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=318368
svn path=/vendor/llvm/llvm-trunk-r303197/; revision=318369; tag=vendor/llvm/llvm-trunk-r303197
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r-- | lib/Transforms/Scalar/CorrelatedValuePropagation.cpp | 10 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 295 | ||||
-rw-r--r-- | lib/Transforms/Scalar/NewGVN.cpp | 210 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SimpleLoopUnswitch.cpp | 161 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SpeculativeExecution.cpp | 43 |
5 files changed, 602 insertions, 117 deletions
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 3f1a77b49a44..ee493a8ec7e1 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -442,9 +442,8 @@ static bool processAdd(BinaryOperator *AddOp, LazyValueInfo *LVI) { bool Changed = false; if (!NUW) { - ConstantRange NUWRange = - LRange.makeGuaranteedNoWrapRegion(BinaryOperator::Add, LRange, - OBO::NoUnsignedWrap); + ConstantRange NUWRange = ConstantRange::makeGuaranteedNoWrapRegion( + BinaryOperator::Add, LRange, OBO::NoUnsignedWrap); if (!NUWRange.isEmptySet()) { bool NewNUW = NUWRange.contains(LazyRRange()); AddOp->setHasNoUnsignedWrap(NewNUW); @@ -452,9 +451,8 @@ static bool processAdd(BinaryOperator *AddOp, LazyValueInfo *LVI) { } } if (!NSW) { - ConstantRange NSWRange = - LRange.makeGuaranteedNoWrapRegion(BinaryOperator::Add, LRange, - OBO::NoSignedWrap); + ConstantRange NSWRange = ConstantRange::makeGuaranteedNoWrapRegion( + BinaryOperator::Add, LRange, OBO::NoSignedWrap); if (!NSWRange.isEmptySet()) { bool NewNSW = NSWRange.contains(LazyRRange()); AddOp->setHasNoSignedWrap(NewNSW); diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 48d5ae88cda9..6693a26e8890 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -144,6 +144,10 @@ private: bool recognizePopcount(); void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst, PHINode *CntPhi, Value *Var); + bool recognizeAndInsertCTLZ(); + void transformLoopToCountable(BasicBlock *PreCondBB, Instruction *CntInst, + PHINode *CntPhi, Value *Var, const DebugLoc DL, + bool ZeroCheck, bool IsCntPhiUsedOutsideLoop); /// @} }; @@ -994,7 +998,7 @@ bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset, } bool LoopIdiomRecognize::runOnNoncountableLoop() { - return recognizePopcount(); + return recognizePopcount() || recognizeAndInsertCTLZ(); } /// Check if the given conditional branch is based on the comparison between @@ -1159,6 +1163,167 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, return true; } +/// Return true if the idiom is detected in the loop. +/// +/// Additionally: +/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ) +/// or nullptr if there is no such. +/// 2) \p CntPhi is set to the corresponding phi node +/// or nullptr if there is no such. +/// 3) \p Var is set to the value whose CTLZ could be used. +/// 4) \p DefX is set to the instruction calculating Loop exit condition. +/// +/// The core idiom we are trying to detect is: +/// \code +/// if (x0 == 0) +/// goto loop-exit // the precondition of the loop +/// cnt0 = init-val; +/// do { +/// x = phi (x0, x.next); //PhiX +/// cnt = phi(cnt0, cnt.next); +/// +/// cnt.next = cnt + 1; +/// ... +/// x.next = x >> 1; // DefX +/// ... +/// } while(x.next != 0); +/// +/// loop-exit: +/// \endcode +static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX, + Instruction *&CntInst, PHINode *&CntPhi, + Instruction *&DefX) { + BasicBlock *LoopEntry; + Value *VarX = nullptr; + + DefX = nullptr; + PhiX = nullptr; + CntInst = nullptr; + CntPhi = nullptr; + LoopEntry = *(CurLoop->block_begin()); + + // step 1: Check if the loop-back branch is in desirable form. + if (Value *T = matchCondition( + dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry)) + DefX = dyn_cast<Instruction>(T); + else + return false; + + // step 2: detect instructions corresponding to "x.next = x >> 1" + if (!DefX || DefX->getOpcode() != Instruction::AShr) + return false; + if (ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1))) + if (!Shft || !Shft->isOne()) + return false; + VarX = DefX->getOperand(0); + + // step 3: Check the recurrence of variable X + PhiX = dyn_cast<PHINode>(VarX); + if (!PhiX || (PhiX->getOperand(0) != DefX && PhiX->getOperand(1) != DefX)) + return false; + + // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1 + // TODO: We can skip the step. If loop trip count is known (CTLZ), + // then all uses of "cnt.next" could be optimized to the trip count + // plus "cnt0". Currently it is not optimized. + // This step could be used to detect POPCNT instruction: + // cnt.next = cnt + (x.next & 1) + for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(), + IterE = LoopEntry->end(); + Iter != IterE; Iter++) { + Instruction *Inst = &*Iter; + if (Inst->getOpcode() != Instruction::Add) + continue; + + ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1)); + if (!Inc || !Inc->isOne()) + continue; + + PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0)); + if (!Phi || Phi->getParent() != LoopEntry) + continue; + + CntInst = Inst; + CntPhi = Phi; + break; + } + if (!CntInst) + return false; + + return true; +} + +/// Recognize CTLZ idiom in a non-countable loop and convert the loop +/// to countable (with CTLZ trip count). +/// If CTLZ inserted as a new trip count returns true; otherwise, returns false. +bool LoopIdiomRecognize::recognizeAndInsertCTLZ() { + // Give up if the loop has multiple blocks or multiple backedges. + if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1) + return false; + + Instruction *CntInst, *DefX; + PHINode *CntPhi, *PhiX; + if (!detectCTLZIdiom(CurLoop, PhiX, CntInst, CntPhi, DefX)) + return false; + + bool IsCntPhiUsedOutsideLoop = false; + for (User *U : CntPhi->users()) + if (!CurLoop->contains(dyn_cast<Instruction>(U))) { + IsCntPhiUsedOutsideLoop = true; + break; + } + bool IsCntInstUsedOutsideLoop = false; + for (User *U : CntInst->users()) + if (!CurLoop->contains(dyn_cast<Instruction>(U))) { + IsCntInstUsedOutsideLoop = true; + break; + } + // If both CntInst and CntPhi are used outside the loop the profitability + // is questionable. + if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop) + return false; + + // For some CPUs result of CTLZ(X) intrinsic is undefined + // when X is 0. If we can not guarantee X != 0, we need to check this + // when expand. + bool ZeroCheck = false; + // It is safe to assume Preheader exist as it was checked in + // parent function RunOnLoop. + BasicBlock *PH = CurLoop->getLoopPreheader(); + Value *InitX = PhiX->getIncomingValueForBlock(PH); + // If we check X != 0 before entering the loop we don't need a zero + // check in CTLZ intrinsic. + if (BasicBlock *PreCondBB = PH->getSinglePredecessor()) + if (BranchInst *PreCondBr = + dyn_cast<BranchInst>(PreCondBB->getTerminator())) { + if (matchCondition(PreCondBr, PH) == InitX) + ZeroCheck = true; + } + + // Check if CTLZ intrinsic is profitable. Assume it is always profitable + // if we delete the loop (the loop has only 6 instructions): + // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ] + // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ] + // %shr = ashr %n.addr.0, 1 + // %tobool = icmp eq %shr, 0 + // %inc = add nsw %i.0, 1 + // br i1 %tobool + + IRBuilder<> Builder(PH->getTerminator()); + SmallVector<const Value *, 2> Ops = + {InitX, ZeroCheck ? Builder.getTrue() : Builder.getFalse()}; + ArrayRef<const Value *> Args(Ops); + if (CurLoop->getHeader()->size() != 6 && + TTI->getIntrinsicCost(Intrinsic::ctlz, InitX->getType(), Args) > + TargetTransformInfo::TCC_Basic) + return false; + + const DebugLoc DL = DefX->getDebugLoc(); + transformLoopToCountable(PH, CntInst, CntPhi, InitX, DL, ZeroCheck, + IsCntPhiUsedOutsideLoop); + return true; +} + /// Recognizes a population count idiom in a non-countable loop. /// /// If detected, transforms the relevant code to issue the popcount intrinsic @@ -1222,6 +1387,134 @@ static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val, return CI; } +static CallInst *createCTLZIntrinsic(IRBuilder<> &IRBuilder, Value *Val, + const DebugLoc &DL, bool ZeroCheck) { + Value *Ops[] = {Val, ZeroCheck ? IRBuilder.getTrue() : IRBuilder.getFalse()}; + Type *Tys[] = {Val->getType()}; + + Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent(); + Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctlz, Tys); + CallInst *CI = IRBuilder.CreateCall(Func, Ops); + CI->setDebugLoc(DL); + + return CI; +} + +/// Transform the following loop: +/// loop: +/// CntPhi = PHI [Cnt0, CntInst] +/// PhiX = PHI [InitX, DefX] +/// CntInst = CntPhi + 1 +/// DefX = PhiX >> 1 +// LOOP_BODY +/// Br: loop if (DefX != 0) +/// Use(CntPhi) or Use(CntInst) +/// +/// Into: +/// If CntPhi used outside the loop: +/// CountPrev = BitWidth(InitX) - CTLZ(InitX >> 1) +/// Count = CountPrev + 1 +/// else +/// Count = BitWidth(InitX) - CTLZ(InitX) +/// loop: +/// CntPhi = PHI [Cnt0, CntInst] +/// PhiX = PHI [InitX, DefX] +/// PhiCount = PHI [Count, Dec] +/// CntInst = CntPhi + 1 +/// DefX = PhiX >> 1 +/// Dec = PhiCount - 1 +/// LOOP_BODY +/// Br: loop if (Dec != 0) +/// Use(CountPrev + Cnt0) // Use(CntPhi) +/// or +/// Use(Count + Cnt0) // Use(CntInst) +/// +/// If LOOP_BODY is empty the loop will be deleted. +/// If CntInst and DefX are not used in LOOP_BODY they will be removed. +void LoopIdiomRecognize::transformLoopToCountable( + BasicBlock *Preheader, Instruction *CntInst, PHINode *CntPhi, Value *InitX, + const DebugLoc DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) { + BranchInst *PreheaderBr = dyn_cast<BranchInst>(Preheader->getTerminator()); + + // Step 1: Insert the CTLZ instruction at the end of the preheader block + // Count = BitWidth - CTLZ(InitX); + // If there are uses of CntPhi create: + // CountPrev = BitWidth - CTLZ(InitX >> 1); + IRBuilder<> Builder(PreheaderBr); + Builder.SetCurrentDebugLocation(DL); + Value *CTLZ, *Count, *CountPrev, *NewCount, *InitXNext; + + if (IsCntPhiUsedOutsideLoop) + InitXNext = Builder.CreateAShr(InitX, + ConstantInt::get(InitX->getType(), 1)); + else + InitXNext = InitX; + CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck); + Count = Builder.CreateSub( + ConstantInt::get(CTLZ->getType(), + CTLZ->getType()->getIntegerBitWidth()), + CTLZ); + if (IsCntPhiUsedOutsideLoop) { + CountPrev = Count; + Count = Builder.CreateAdd( + CountPrev, + ConstantInt::get(CountPrev->getType(), 1)); + } + if (IsCntPhiUsedOutsideLoop) + NewCount = Builder.CreateZExtOrTrunc(CountPrev, + cast<IntegerType>(CntInst->getType())); + else + NewCount = Builder.CreateZExtOrTrunc(Count, + cast<IntegerType>(CntInst->getType())); + + // If the CTLZ counter's initial value is not zero, insert Add Inst. + Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader); + ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal); + if (!InitConst || !InitConst->isZero()) + NewCount = Builder.CreateAdd(NewCount, CntInitVal); + + // Step 2: Insert new IV and loop condition: + // loop: + // ... + // PhiCount = PHI [Count, Dec] + // ... + // Dec = PhiCount - 1 + // ... + // Br: loop if (Dec != 0) + BasicBlock *Body = *(CurLoop->block_begin()); + auto *LbBr = dyn_cast<BranchInst>(Body->getTerminator()); + ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition()); + Type *Ty = Count->getType(); + + PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front()); + + Builder.SetInsertPoint(LbCond); + Instruction *TcDec = cast<Instruction>( + Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1), + "tcdec", false, true)); + + TcPhi->addIncoming(Count, Preheader); + TcPhi->addIncoming(TcDec, Body); + + CmpInst::Predicate Pred = + (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ; + LbCond->setPredicate(Pred); + LbCond->setOperand(0, TcDec); + LbCond->setOperand(1, ConstantInt::get(Ty, 0)); + + // Step 3: All the references to the original counter outside + // the loop are replaced with the NewCount -- the value returned from + // __builtin_ctlz(x). + if (IsCntPhiUsedOutsideLoop) + CntPhi->replaceUsesOutsideBlock(NewCount, Body); + else + CntInst->replaceUsesOutsideBlock(NewCount, Body); + + // step 4: Forget the "non-computable" trip-count SCEV associated with the + // loop. The loop would otherwise not be deleted even if it becomes empty. + SE->forgetLoop(CurLoop); +} + void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst, PHINode *CntPhi, Value *Var) { diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index 3c9850b156ac..5e0a705782ea 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -283,7 +283,6 @@ public: // Forward propagation info const Expression *getDefiningExpr() const { return DefiningExpr; } - void setDefiningExpr(const Expression *E) { DefiningExpr = E; } // Value member set bool empty() const { return Members.empty(); } @@ -317,6 +316,9 @@ public: --StoreCount; } + // True if this class has no memory members. + bool definesNoMemory() const { return StoreCount == 0 && memory_empty(); } + // Return true if two congruence classes are equivalent to each other. This // means // that every field but the ID number and the dead field are equivalent. @@ -401,9 +403,12 @@ class NewGVN { MemorySSAWalker *MSSAWalker; const DataLayout &DL; std::unique_ptr<PredicateInfo> PredInfo; - BumpPtrAllocator ExpressionAllocator; - ArrayRecycler<Value *> ArgRecycler; - TarjanSCC SCCFinder; + + // These are the only two things the create* functions should have + // side-effects on due to allocating memory. + mutable BumpPtrAllocator ExpressionAllocator; + mutable ArrayRecycler<Value *> ArgRecycler; + mutable TarjanSCC SCCFinder; const SimplifyQuery SQ; // Number of function arguments, used by ranking @@ -430,11 +435,12 @@ class NewGVN { // In order to correctly ensure propagation, we must keep track of what // comparisons we used, so that when the values of the comparisons change, we // propagate the information to the places we used the comparison. - DenseMap<const Value *, SmallPtrSet<Instruction *, 2>> PredicateToUsers; - // Mapping from MemoryAccess we used to the MemoryAccess we used it with. Has + mutable DenseMap<const Value *, SmallPtrSet<Instruction *, 2>> + PredicateToUsers; // the same reasoning as PredicateToUsers. When we skip MemoryAccesses for // stores, we no longer can rely solely on the def-use chains of MemorySSA. - DenseMap<const MemoryAccess *, SmallPtrSet<MemoryAccess *, 2>> MemoryToUsers; + mutable DenseMap<const MemoryAccess *, SmallPtrSet<MemoryAccess *, 2>> + MemoryToUsers; // A table storing which memorydefs/phis represent a memory state provably // equivalent to another memory state. @@ -457,7 +463,7 @@ class NewGVN { DenseMap<const MemoryPhi *, MemoryPhiState> MemoryPhiState; enum PhiCycleState { PCS_Unknown, PCS_CycleFree, PCS_Cycle }; - DenseMap<const PHINode *, PhiCycleState> PhiCycleState; + mutable DenseMap<const PHINode *, PhiCycleState> PhiCycleState; // Expression to class mapping. using ExpressionClassMap = DenseMap<const Expression *, CongruenceClass *>; ExpressionClassMap ExpressionToClass; @@ -511,21 +517,24 @@ public: private: // Expression handling. - const Expression *createExpression(Instruction *); - const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *); - PHIExpression *createPHIExpression(Instruction *, bool &HasBackedge, - bool &AllConstant); - const VariableExpression *createVariableExpression(Value *); - const ConstantExpression *createConstantExpression(Constant *); - const Expression *createVariableOrConstant(Value *V); - const UnknownExpression *createUnknownExpression(Instruction *); + const Expression *createExpression(Instruction *) const; + const Expression *createBinaryExpression(unsigned, Type *, Value *, + Value *) const; + PHIExpression *createPHIExpression(Instruction *, bool &HasBackEdge, + bool &AllConstant) const; + const VariableExpression *createVariableExpression(Value *) const; + const ConstantExpression *createConstantExpression(Constant *) const; + const Expression *createVariableOrConstant(Value *V) const; + const UnknownExpression *createUnknownExpression(Instruction *) const; const StoreExpression *createStoreExpression(StoreInst *, - const MemoryAccess *); + const MemoryAccess *) const; LoadExpression *createLoadExpression(Type *, Value *, LoadInst *, - const MemoryAccess *); - const CallExpression *createCallExpression(CallInst *, const MemoryAccess *); - const AggregateValueExpression *createAggregateValueExpression(Instruction *); - bool setBasicExpressionInfo(Instruction *, BasicExpression *); + const MemoryAccess *) const; + const CallExpression *createCallExpression(CallInst *, + const MemoryAccess *) const; + const AggregateValueExpression * + createAggregateValueExpression(Instruction *) const; + bool setBasicExpressionInfo(Instruction *, BasicExpression *) const; // Congruence class handling. CongruenceClass *createCongruenceClass(Value *Leader, const Expression *E) { @@ -560,17 +569,18 @@ private: // Symbolic evaluation. const Expression *checkSimplificationResults(Expression *, Instruction *, - Value *); - const Expression *performSymbolicEvaluation(Value *); + Value *) const; + const Expression *performSymbolicEvaluation(Value *) const; const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *, - Instruction *, MemoryAccess *); - const Expression *performSymbolicLoadEvaluation(Instruction *); - const Expression *performSymbolicStoreEvaluation(Instruction *); - const Expression *performSymbolicCallEvaluation(Instruction *); - const Expression *performSymbolicPHIEvaluation(Instruction *); - const Expression *performSymbolicAggrValueEvaluation(Instruction *); - const Expression *performSymbolicCmpEvaluation(Instruction *); - const Expression *performSymbolicPredicateInfoEvaluation(Instruction *); + Instruction *, + MemoryAccess *) const; + const Expression *performSymbolicLoadEvaluation(Instruction *) const; + const Expression *performSymbolicStoreEvaluation(Instruction *) const; + const Expression *performSymbolicCallEvaluation(Instruction *) const; + const Expression *performSymbolicPHIEvaluation(Instruction *) const; + const Expression *performSymbolicAggrValueEvaluation(Instruction *) const; + const Expression *performSymbolicCmpEvaluation(Instruction *) const; + const Expression *performSymbolicPredicateInfoEvaluation(Instruction *) const; // Congruence finding. bool someEquivalentDominates(const Instruction *, const Instruction *) const; @@ -620,8 +630,8 @@ private: void markPredicateUsersTouched(Instruction *); void markValueLeaderChangeTouched(CongruenceClass *CC); void markMemoryLeaderChangeTouched(CongruenceClass *CC); - void addPredicateUsers(const PredicateBase *, Instruction *); - void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U); + void addPredicateUsers(const PredicateBase *, Instruction *) const; + void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const; // Main loop of value numbering void iterateTouchedInstructions(); @@ -634,7 +644,7 @@ private: void verifyIterationSettled(Function &F); bool singleReachablePHIPath(const MemoryAccess *, const MemoryAccess *) const; BasicBlock *getBlockForValue(Value *V) const; - void deleteExpression(const Expression *E); + void deleteExpression(const Expression *E) const; unsigned InstrToDFSNum(const Value *V) const { assert(isa<Instruction>(V) && "This should not be used for MemoryAccesses"); return InstrDFS.lookup(V); @@ -654,7 +664,7 @@ private: ? InstrToDFSNum(cast<MemoryUseOrDef>(MA)->getMemoryInst()) : InstrDFS.lookup(MA); } - bool isCycleFree(const PHINode *PN); + bool isCycleFree(const PHINode *PN) const; template <class T, class Range> T *getMinDFSOfRange(const Range &) const; // Debug counter info. When verifying, we have to reset the value numbering // debug counter to the same state it started in to get the same results. @@ -702,7 +712,7 @@ BasicBlock *NewGVN::getBlockForValue(Value *V) const { // Delete a definitely dead expression, so it can be reused by the expression // allocator. Some of these are not in creation functions, so we have to accept // const versions. -void NewGVN::deleteExpression(const Expression *E) { +void NewGVN::deleteExpression(const Expression *E) const { assert(isa<BasicExpression>(E)); auto *BE = cast<BasicExpression>(E); const_cast<BasicExpression *>(BE)->deallocateOperands(ArgRecycler); @@ -710,7 +720,7 @@ void NewGVN::deleteExpression(const Expression *E) { } PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge, - bool &AllConstant) { + bool &AllConstant) const { BasicBlock *PHIBlock = I->getParent(); auto *PN = cast<PHINode>(I); auto *E = @@ -722,30 +732,46 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge, unsigned PHIRPO = RPOOrdering.lookup(DT->getNode(PHIBlock)); + // NewGVN assumes the operands of a PHI node are in a consistent order across + // PHIs. LLVM doesn't seem to always guarantee this. While we need to fix + // this in LLVM at some point we don't want GVN to find wrong congruences. + // Therefore, here we sort uses in predecessor order. + // We're sorting the values by pointer. In theory this might be cause of + // non-determinism, but here we don't rely on the ordering for anything + // significant, e.g. we don't create new instructions based on it so we're + // fine. + SmallVector<const Use *, 4> PHIOperands; + for (const Use &U : PN->operands()) + PHIOperands.push_back(&U); + std::sort(PHIOperands.begin(), PHIOperands.end(), + [&](const Use *U1, const Use *U2) { + return PN->getIncomingBlock(*U1) < PN->getIncomingBlock(*U2); + }); + // Filter out unreachable phi operands. - auto Filtered = make_filter_range(PN->operands(), [&](const Use &U) { - return ReachableEdges.count({PN->getIncomingBlock(U), PHIBlock}); + auto Filtered = make_filter_range(PHIOperands, [&](const Use *U) { + return ReachableEdges.count({PN->getIncomingBlock(*U), PHIBlock}); }); std::transform(Filtered.begin(), Filtered.end(), op_inserter(E), - [&](const Use &U) -> Value * { - auto *BB = PN->getIncomingBlock(U); + [&](const Use *U) -> Value * { + auto *BB = PN->getIncomingBlock(*U); auto *DTN = DT->getNode(BB); if (RPOOrdering.lookup(DTN) >= PHIRPO) HasBackedge = true; - AllConstant &= isa<UndefValue>(U) || isa<Constant>(U); + AllConstant &= isa<UndefValue>(*U) || isa<Constant>(*U); // Don't try to transform self-defined phis. - if (U == PN) + if (*U == PN) return PN; - return lookupOperandLeader(U); + return lookupOperandLeader(*U); }); return E; } // Set basic expression info (Arguments, type, opcode) for Expression // E from Instruction I in block B. -bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) { +bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) const { bool AllConstant = true; if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) E->setType(GEP->getSourceElementType()); @@ -766,7 +792,8 @@ bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) { } const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T, - Value *Arg1, Value *Arg2) { + Value *Arg1, + Value *Arg2) const { auto *E = new (ExpressionAllocator) BasicExpression(2); E->setType(T); @@ -795,7 +822,8 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T, // TODO: Once finished, this should not take an Instruction, we only // use it for printing. const Expression *NewGVN::checkSimplificationResults(Expression *E, - Instruction *I, Value *V) { + Instruction *I, + Value *V) const { if (!V) return nullptr; if (auto *C = dyn_cast<Constant>(V)) { @@ -827,7 +855,7 @@ const Expression *NewGVN::checkSimplificationResults(Expression *E, return nullptr; } -const Expression *NewGVN::createExpression(Instruction *I) { +const Expression *NewGVN::createExpression(Instruction *I) const { auto *E = new (ExpressionAllocator) BasicExpression(I->getNumOperands()); bool AllConstant = setBasicExpressionInfo(I, E); @@ -913,7 +941,7 @@ const Expression *NewGVN::createExpression(Instruction *I) { } const AggregateValueExpression * -NewGVN::createAggregateValueExpression(Instruction *I) { +NewGVN::createAggregateValueExpression(Instruction *I) const { if (auto *II = dyn_cast<InsertValueInst>(I)) { auto *E = new (ExpressionAllocator) AggregateValueExpression(I->getNumOperands(), II->getNumIndices()); @@ -932,32 +960,32 @@ NewGVN::createAggregateValueExpression(Instruction *I) { llvm_unreachable("Unhandled type of aggregate value operation"); } -const VariableExpression *NewGVN::createVariableExpression(Value *V) { +const VariableExpression *NewGVN::createVariableExpression(Value *V) const { auto *E = new (ExpressionAllocator) VariableExpression(V); E->setOpcode(V->getValueID()); return E; } -const Expression *NewGVN::createVariableOrConstant(Value *V) { +const Expression *NewGVN::createVariableOrConstant(Value *V) const { if (auto *C = dyn_cast<Constant>(V)) return createConstantExpression(C); return createVariableExpression(V); } -const ConstantExpression *NewGVN::createConstantExpression(Constant *C) { +const ConstantExpression *NewGVN::createConstantExpression(Constant *C) const { auto *E = new (ExpressionAllocator) ConstantExpression(C); E->setOpcode(C->getValueID()); return E; } -const UnknownExpression *NewGVN::createUnknownExpression(Instruction *I) { +const UnknownExpression *NewGVN::createUnknownExpression(Instruction *I) const { auto *E = new (ExpressionAllocator) UnknownExpression(I); E->setOpcode(I->getOpcode()); return E; } -const CallExpression *NewGVN::createCallExpression(CallInst *CI, - const MemoryAccess *MA) { +const CallExpression * +NewGVN::createCallExpression(CallInst *CI, const MemoryAccess *MA) const { // FIXME: Add operand bundles for calls. auto *E = new (ExpressionAllocator) CallExpression(CI->getNumOperands(), CI, MA); @@ -1017,9 +1045,8 @@ Value *NewGVN::lookupOperandLeader(Value *V) const { const MemoryAccess *NewGVN::lookupMemoryLeader(const MemoryAccess *MA) const { auto *CC = getMemoryClass(MA); assert(CC->getMemoryLeader() && - "Every MemoryAccess should be mapped to a " - "congruence class with a represenative memory " - "access"); + "Every MemoryAccess should be mapped to a congruence class with a " + "representative memory access"); return CC->getMemoryLeader(); } @@ -1032,7 +1059,7 @@ bool NewGVN::isMemoryAccessTop(const MemoryAccess *MA) const { LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp, LoadInst *LI, - const MemoryAccess *MA) { + const MemoryAccess *MA) const { auto *E = new (ExpressionAllocator) LoadExpression(1, LI, lookupMemoryLeader(MA)); E->allocateOperands(ArgRecycler, ExpressionAllocator); @@ -1050,8 +1077,8 @@ LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp, return E; } -const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI, - const MemoryAccess *MA) { +const StoreExpression * +NewGVN::createStoreExpression(StoreInst *SI, const MemoryAccess *MA) const { auto *StoredValueLeader = lookupOperandLeader(SI->getValueOperand()); auto *E = new (ExpressionAllocator) StoreExpression(SI->getNumOperands(), SI, StoredValueLeader, MA); @@ -1068,7 +1095,7 @@ const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI, return E; } -const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) { +const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const { // Unlike loads, we never try to eliminate stores, so we do not check if they // are simple and avoid value numbering them. auto *SI = cast<StoreInst>(I); @@ -1126,7 +1153,7 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) { const Expression * NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr, LoadInst *LI, Instruction *DepInst, - MemoryAccess *DefiningAccess) { + MemoryAccess *DefiningAccess) const { assert((!LI || LI->isSimple()) && "Not a simple load"); if (auto *DepSI = dyn_cast<StoreInst>(DepInst)) { // Can't forward from non-atomic to atomic without violating memory model. @@ -1201,7 +1228,7 @@ NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr, return nullptr; } -const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) { +const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const { auto *LI = cast<LoadInst>(I); // We can eliminate in favor of non-simple loads, but we won't be able to @@ -1239,7 +1266,7 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) { } const Expression * -NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) { +NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const { auto *PI = PredInfo->getPredicateInfoFor(I); if (!PI) return nullptr; @@ -1284,7 +1311,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) { return nullptr; if (CopyOf != Cmp->getOperand(0) && CopyOf != Cmp->getOperand(1)) { - DEBUG(dbgs() << "Copy is not of any condition operands!"); + DEBUG(dbgs() << "Copy is not of any condition operands!\n"); return nullptr; } Value *FirstOp = lookupOperandLeader(Cmp->getOperand(0)); @@ -1329,7 +1356,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) { } // Evaluate read only and pure calls, and create an expression result. -const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I) { +const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I) const { auto *CI = cast<CallInst>(I); if (auto *II = dyn_cast<IntrinsicInst>(I)) { // Instrinsics with the returned attribute are copies of arguments. @@ -1366,8 +1393,7 @@ bool NewGVN::setMemoryClass(const MemoryAccess *From, DEBUG(dbgs() << "Setting " << *From); DEBUG(dbgs() << " equivalent to congruence class "); DEBUG(dbgs() << NewClass->getID() << " with current MemoryAccess leader "); - DEBUG(dbgs() << *NewClass->getMemoryLeader()); - DEBUG(dbgs() << "\n"); + DEBUG(dbgs() << *NewClass->getMemoryLeader() << "\n"); auto LookupResult = MemoryAccessToClass.find(From); bool Changed = false; @@ -1381,7 +1407,7 @@ bool NewGVN::setMemoryClass(const MemoryAccess *From, NewClass->memory_insert(MP); // This may have killed the class if it had no non-memory members if (OldClass->getMemoryLeader() == From) { - if (OldClass->memory_empty()) { + if (OldClass->definesNoMemory()) { OldClass->setMemoryLeader(nullptr); } else { OldClass->setMemoryLeader(getNextMemoryLeader(OldClass)); @@ -1406,7 +1432,7 @@ bool NewGVN::setMemoryClass(const MemoryAccess *From, // Determine if a phi is cycle-free. That means the values in the phi don't // depend on any expressions that can change value as a result of the phi. // For example, a non-cycle free phi would be v = phi(0, v+1). -bool NewGVN::isCycleFree(const PHINode *PN) { +bool NewGVN::isCycleFree(const PHINode *PN) const { // In order to compute cycle-freeness, we do SCC finding on the phi, and see // what kind of SCC it ends up in. If it is a singleton, it is cycle-free. // If it is not in a singleton, it is only cycle free if the other members are @@ -1436,7 +1462,7 @@ bool NewGVN::isCycleFree(const PHINode *PN) { } // Evaluate PHI nodes symbolically, and create an expression result. -const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) { +const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const { // True if one of the incoming phi edges is a backedge. bool HasBackedge = false; // All constant tracks the state of whether all the *original* phi operands @@ -1510,7 +1536,8 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) { return E; } -const Expression *NewGVN::performSymbolicAggrValueEvaluation(Instruction *I) { +const Expression * +NewGVN::performSymbolicAggrValueEvaluation(Instruction *I) const { if (auto *EI = dyn_cast<ExtractValueInst>(I)) { auto *II = dyn_cast<IntrinsicInst>(EI->getAggregateOperand()); if (II && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) { @@ -1548,7 +1575,7 @@ const Expression *NewGVN::performSymbolicAggrValueEvaluation(Instruction *I) { return createAggregateValueExpression(I); } -const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) { +const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) const { auto *CI = dyn_cast<CmpInst>(I); // See if our operands are equal to those of a previous predicate, and if so, // if it implies true or false. @@ -1663,7 +1690,7 @@ const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) { } // Substitute and symbolize the value before value numbering. -const Expression *NewGVN::performSymbolicEvaluation(Value *V) { +const Expression *NewGVN::performSymbolicEvaluation(Value *V) const { const Expression *E = nullptr; if (auto *C = dyn_cast<Constant>(V)) E = createConstantExpression(C); @@ -1749,7 +1776,7 @@ void NewGVN::markUsersTouched(Value *V) { } } -void NewGVN::addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) { +void NewGVN::addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const { DEBUG(dbgs() << "Adding memory user " << *U << " to " << *To << "\n"); MemoryToUsers[To].insert(U); } @@ -1772,7 +1799,7 @@ void NewGVN::markMemoryUsersTouched(const MemoryAccess *MA) { } // Add I to the set of users of a given predicate. -void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) { +void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) const { if (auto *PBranch = dyn_cast<PredicateBranch>(PB)) PredicateToUsers[PBranch->Condition].insert(I); else if (auto *PAssume = dyn_cast<PredicateBranch>(PB)) @@ -1825,8 +1852,7 @@ const MemoryAccess *NewGVN::getNextMemoryLeader(CongruenceClass *CC) const { // TODO: If this ends up to slow, we can maintain a next memory leader like we // do for regular leaders. // Make sure there will be a leader to find - assert((CC->getStoreCount() > 0 || !CC->memory_empty()) && - "Can't get next leader if there is none"); + assert(!CC->definesNoMemory() && "Can't get next leader if there is none"); if (CC->getStoreCount() > 0) { if (auto *NL = dyn_cast_or_null<StoreInst>(CC->getNextLeader().first)) return MSSA->getMemoryAccess(NL); @@ -1898,7 +1924,7 @@ void NewGVN::moveMemoryToNewCongruenceClass(Instruction *I, setMemoryClass(InstMA, NewClass); // Now, fixup the old class if necessary if (OldClass->getMemoryLeader() == InstMA) { - if (OldClass->getStoreCount() != 0 || !OldClass->memory_empty()) { + if (!OldClass->definesNoMemory()) { OldClass->setMemoryLeader(getNextMemoryLeader(OldClass)); DEBUG(dbgs() << "Memory class leader change for class " << OldClass->getID() << " to " @@ -1956,10 +1982,9 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, if (NewClass->getStoreCount() == 0 && !NewClass->getStoredValue()) { // If it's a store expression we are using, it means we are not equivalent // to something earlier. - if (isa<StoreExpression>(E)) { - assert(lookupOperandLeader(SI->getValueOperand()) != - NewClass->getLeader()); - NewClass->setStoredValue(lookupOperandLeader(SI->getValueOperand())); + if (auto *SE = dyn_cast<StoreExpression>(E)) { + assert(SE->getStoredValue() != NewClass->getLeader()); + NewClass->setStoredValue(SE->getStoredValue()); markValueLeaderChangeTouched(NewClass); // Shift the new class leader to be the store DEBUG(dbgs() << "Changing leader of congruence class " @@ -1985,7 +2010,7 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, // See if we destroyed the class or need to swap leaders. if (OldClass->empty() && OldClass != TOPClass) { if (OldClass->getDefiningExpr()) { - DEBUG(dbgs() << "Erasing expression " << OldClass->getDefiningExpr() + DEBUG(dbgs() << "Erasing expression " << *OldClass->getDefiningExpr() << " from table\n"); ExpressionToClass.erase(OldClass->getDefiningExpr()); } @@ -2064,7 +2089,7 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) { } else if (const auto *SE = dyn_cast<StoreExpression>(E)) { StoreInst *SI = SE->getStoreInst(); NewClass->setLeader(SI); - NewClass->setStoredValue(lookupOperandLeader(SI->getValueOperand())); + NewClass->setStoredValue(SE->getStoredValue()); // The RepMemoryAccess field will be filled in properly by the // moveValueToNewCongruenceClass call. } else { @@ -2523,6 +2548,19 @@ void NewGVN::verifyMemoryCongruency() const { return false; if (auto *MemDef = dyn_cast<MemoryDef>(Pair.first)) return !isInstructionTriviallyDead(MemDef->getMemoryInst()); + + // We could have phi nodes which operands are all trivially dead, + // so we don't process them. + if (auto *MemPHI = dyn_cast<MemoryPhi>(Pair.first)) { + for (auto &U : MemPHI->incoming_values()) { + if (Instruction *I = dyn_cast<Instruction>(U.get())) { + if (!isInstructionTriviallyDead(I)) + return true; + } + } + return false; + } + return true; }; diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index fb1b47c48276..4f608c97147d 100644 --- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -55,7 +55,7 @@ static void replaceLoopUsesWithConstant(Loop &L, Value &LIC, /// Update the dominator tree after removing one exiting predecessor of a loop /// exit block. static void updateLoopExitIDom(BasicBlock *LoopExitBB, Loop &L, - DominatorTree &DT) { + DominatorTree &DT) { assert(pred_begin(LoopExitBB) != pred_end(LoopExitBB) && "Cannot have empty predecessors of the loop exit block if we split " "off a block to unswitch!"); @@ -137,6 +137,98 @@ static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH, } } +/// Check that all the LCSSA PHI nodes in the loop exit block have trivial +/// incoming values along this edge. +static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB, + BasicBlock &ExitBB) { + for (Instruction &I : ExitBB) { + auto *PN = dyn_cast<PHINode>(&I); + if (!PN) + // No more PHIs to check. + return true; + + // If the incoming value for this edge isn't loop invariant the unswitch + // won't be trivial. + if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB))) + return false; + } + llvm_unreachable("Basic blocks should never be empty!"); +} + +/// Rewrite the PHI nodes in an unswitched loop exit basic block. +/// +/// Requires that the loop exit and unswitched basic block are the same, and +/// that the exiting block was a unique predecessor of that block. Rewrites the +/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial +/// PHI nodes from the old preheader that now contains the unswitched +/// terminator. +static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, + BasicBlock &OldExitingBB, + BasicBlock &OldPH) { + for (Instruction &I : UnswitchedBB) { + auto *PN = dyn_cast<PHINode>(&I); + if (!PN) + // No more PHIs to check. + break; + + // When the loop exit is directly unswitched we just need to update the + // incoming basic block. We loop to handle weird cases with repeated + // incoming blocks, but expect to typically only have one operand here. + for (auto i : llvm::seq<int>(0, PN->getNumOperands())) { + assert(PN->getIncomingBlock(i) == &OldExitingBB && + "Found incoming block different from unique predecessor!"); + PN->setIncomingBlock(i, &OldPH); + } + } +} + +/// Rewrite the PHI nodes in the loop exit basic block and the split off +/// unswitched block. +/// +/// Because the exit block remains an exit from the loop, this rewrites the +/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI +/// nodes into the unswitched basic block to select between the value in the +/// old preheader and the loop exit. +static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, + BasicBlock &UnswitchedBB, + BasicBlock &OldExitingBB, + BasicBlock &OldPH) { + assert(&ExitBB != &UnswitchedBB && + "Must have different loop exit and unswitched blocks!"); + Instruction *InsertPt = &*UnswitchedBB.begin(); + for (Instruction &I : ExitBB) { + auto *PN = dyn_cast<PHINode>(&I); + if (!PN) + // No more PHIs to check. + break; + + auto *NewPN = PHINode::Create(PN->getType(), /*NumReservedValues*/ 2, + PN->getName() + ".split", InsertPt); + + // Walk backwards over the old PHI node's inputs to minimize the cost of + // removing each one. We have to do this weird loop manually so that we + // create the same number of new incoming edges in the new PHI as we expect + // each case-based edge to be included in the unswitched switch in some + // cases. + // FIXME: This is really, really gross. It would be much cleaner if LLVM + // allowed us to create a single entry for a predecessor block without + // having separate entries for each "edge" even though these edges are + // required to produce identical results. + for (int i = PN->getNumIncomingValues() - 1; i >= 0; --i) { + if (PN->getIncomingBlock(i) != &OldExitingBB) + continue; + + Value *Incoming = PN->removeIncomingValue(i); + NewPN->addIncoming(Incoming, &OldPH); + } + + // Now replace the old PHI with the new one and wire the old one in as an + // input to the new one. + PN->replaceAllUsesWith(NewPN); + NewPN->addIncoming(PN, &ExitBB); + } +} + /// Unswitch a trivial branch if the condition is loop invariant. /// /// This routine should only be called when loop code leading to the branch has @@ -187,10 +279,8 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, assert(L.contains(ContinueBB) && "Cannot have both successors exit and still be in the loop!"); - // If the loop exit block contains phi nodes, this isn't trivial. - // FIXME: We should examine the PHI to determine whether or not we can handle - // it trivially. - if (isa<PHINode>(LoopExitBB->begin())) + auto *ParentBB = BI.getParent(); + if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) return false; DEBUG(dbgs() << " unswitching trivial branch when: " << CondVal @@ -209,14 +299,13 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, BasicBlock *UnswitchedBB; if (BasicBlock *PredBB = LoopExitBB->getUniquePredecessor()) { (void)PredBB; - assert(PredBB == BI.getParent() && "A branch's parent is't a predecessor!"); + assert(PredBB == BI.getParent() && + "A branch's parent isn't a predecessor!"); UnswitchedBB = LoopExitBB; } else { UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI); } - BasicBlock *ParentBB = BI.getParent(); - // Now splice the branch to gate reaching the new preheader and re-point its // successors. OldPH->getInstList().splice(std::prev(OldPH->end()), @@ -229,6 +318,13 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, // terminator. BranchInst::Create(ContinueBB, ParentBB); + // Rewrite the relevant PHI nodes. + if (UnswitchedBB == LoopExitBB) + rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH); + else + rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB, + *ParentBB, *OldPH); + // Now we need to update the dominator tree. updateDTAfterUnswitch(UnswitchedBB, OldPH, DT); // But if we split something off of the loop exit block then we also removed @@ -278,6 +374,8 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, if (!L.isLoopInvariant(LoopCond)) return false; + auto *ParentBB = SI.getParent(); + // FIXME: We should compute this once at the start and update it! SmallVector<BasicBlock *, 16> ExitBlocks; L.getExitBlocks(ExitBlocks); @@ -287,12 +385,13 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, SmallVector<int, 4> ExitCaseIndices; for (auto Case : SI.cases()) { auto *SuccBB = Case.getCaseSuccessor(); - if (ExitBlockSet.count(SuccBB) && !isa<PHINode>(SuccBB->begin())) + if (ExitBlockSet.count(SuccBB) && + areLoopExitPHIsLoopInvariant(L, *ParentBB, *SuccBB)) ExitCaseIndices.push_back(Case.getCaseIndex()); } BasicBlock *DefaultExitBB = nullptr; if (ExitBlockSet.count(SI.getDefaultDest()) && - !isa<PHINode>(SI.getDefaultDest()->begin()) && + areLoopExitPHIsLoopInvariant(L, *ParentBB, *SI.getDefaultDest()) && !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator())) DefaultExitBB = SI.getDefaultDest(); else if (ExitCaseIndices.empty()) @@ -330,7 +429,6 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, if (CommonSuccBB) { SI.setDefaultDest(CommonSuccBB); } else { - BasicBlock *ParentBB = SI.getParent(); BasicBlock *UnreachableBB = BasicBlock::Create( ParentBB->getContext(), Twine(ParentBB->getName()) + ".unreachable_default", @@ -358,30 +456,44 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, // Now add the unswitched switch. auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH); - // Split any exit blocks with remaining in-loop predecessors. We walk in - // reverse so that we split in the same order as the cases appeared. This is - // purely for convenience of reading the resulting IR, but it doesn't cost - // anything really. + // Rewrite the IR for the unswitched basic blocks. This requires two steps. + // First, we split any exit blocks with remaining in-loop predecessors. Then + // we update the PHIs in one of two ways depending on if there was a split. + // We walk in reverse so that we split in the same order as the cases + // appeared. This is purely for convenience of reading the resulting IR, but + // it doesn't cost anything really. + SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs; SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap; // Handle the default exit if necessary. // FIXME: It'd be great if we could merge this with the loop below but LLVM's // ranges aren't quite powerful enough yet. - if (DefaultExitBB && !pred_empty(DefaultExitBB)) { - auto *SplitBB = - SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI); - updateLoopExitIDom(DefaultExitBB, L, DT); - DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB; + if (DefaultExitBB) { + if (pred_empty(DefaultExitBB)) { + UnswitchedExitBBs.insert(DefaultExitBB); + rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH); + } else { + auto *SplitBB = + SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI); + rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB, + *ParentBB, *OldPH); + updateLoopExitIDom(DefaultExitBB, L, DT); + DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB; + } } // Note that we must use a reference in the for loop so that we update the // container. for (auto &CasePair : reverse(ExitCases)) { // Grab a reference to the exit block in the pair so that we can update it. - BasicBlock *&ExitBB = CasePair.second; + BasicBlock *ExitBB = CasePair.second; // If this case is the last edge into the exit block, we can simply reuse it // as it will no longer be a loop exit. No mapping necessary. - if (pred_empty(ExitBB)) + if (pred_empty(ExitBB)) { + // Only rewrite once. + if (UnswitchedExitBBs.insert(ExitBB).second) + rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH); continue; + } // Otherwise we need to split the exit block so that we retain an exit // block from the loop and a target for the unswitched condition. @@ -389,9 +501,12 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, if (!SplitExitBB) { // If this is the first time we see this, do the split and remember it. SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI); + rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB, + *ParentBB, *OldPH); updateLoopExitIDom(ExitBB, L, DT); } - ExitBB = SplitExitBB; + // Update the case pair to point to the split block. + CasePair.second = SplitExitBB; } // Now add the unswitched cases. We do this in reverse order as we built them diff --git a/lib/Transforms/Scalar/SpeculativeExecution.cpp b/lib/Transforms/Scalar/SpeculativeExecution.cpp index a0fc966cee2c..a7c308b59877 100644 --- a/lib/Transforms/Scalar/SpeculativeExecution.cpp +++ b/lib/Transforms/Scalar/SpeculativeExecution.cpp @@ -208,6 +208,47 @@ bool SpeculativeExecutionPass::runOnBasicBlock(BasicBlock &B) { return false; } +static unsigned ComputeSpeculationCost(const Instruction *I, + const TargetTransformInfo &TTI) { + switch (Operator::getOpcode(I)) { + case Instruction::GetElementPtr: + case Instruction::Add: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Select: + case Instruction::Shl: + case Instruction::Sub: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::Xor: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::Call: + case Instruction::BitCast: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::AddrSpaceCast: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPExt: + case Instruction::FPTrunc: + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + case Instruction::ICmp: + case Instruction::FCmp: + return TTI.getUserCost(I); + + default: + return UINT_MAX; // Disallow anything not whitelisted. + } +} + bool SpeculativeExecutionPass::considerHoistingFromTo( BasicBlock &FromBlock, BasicBlock &ToBlock) { SmallSet<const Instruction *, 8> NotHoisted; @@ -223,7 +264,7 @@ bool SpeculativeExecutionPass::considerHoistingFromTo( unsigned TotalSpeculationCost = 0; for (auto& I : FromBlock) { - const unsigned Cost = TTI->getUserCost(&I); + const unsigned Cost = ComputeSpeculationCost(&I, *TTI); if (Cost != UINT_MAX && isSafeToSpeculativelyExecute(&I) && AllPrecedingUsesFromBlockHoisted(&I)) { TotalSpeculationCost += Cost; |