aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-05-16 19:46:52 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-05-16 19:46:52 +0000
commit6b3f41ed88e8e440e11a4fbf20b6600529f80049 (patch)
tree928b056f24a634d628c80238dbbf10d41b1a71d5 /lib/Transforms/Scalar
parentc46e6a5940c50058e00c0c5f9123fd82e338d29a (diff)
Vendor import of llvm trunk r303197:vendor/llvm/llvm-trunk-r303197
Notes
Notes: svn path=/vendor/llvm/dist/; revision=318368 svn path=/vendor/llvm/llvm-trunk-r303197/; revision=318369; tag=vendor/llvm/llvm-trunk-r303197
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp10
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp295
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp210
-rw-r--r--lib/Transforms/Scalar/SimpleLoopUnswitch.cpp161
-rw-r--r--lib/Transforms/Scalar/SpeculativeExecution.cpp43
5 files changed, 602 insertions, 117 deletions
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 3f1a77b49a44..ee493a8ec7e1 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -442,9 +442,8 @@ static bool processAdd(BinaryOperator *AddOp, LazyValueInfo *LVI) {
bool Changed = false;
if (!NUW) {
- ConstantRange NUWRange =
- LRange.makeGuaranteedNoWrapRegion(BinaryOperator::Add, LRange,
- OBO::NoUnsignedWrap);
+ ConstantRange NUWRange = ConstantRange::makeGuaranteedNoWrapRegion(
+ BinaryOperator::Add, LRange, OBO::NoUnsignedWrap);
if (!NUWRange.isEmptySet()) {
bool NewNUW = NUWRange.contains(LazyRRange());
AddOp->setHasNoUnsignedWrap(NewNUW);
@@ -452,9 +451,8 @@ static bool processAdd(BinaryOperator *AddOp, LazyValueInfo *LVI) {
}
}
if (!NSW) {
- ConstantRange NSWRange =
- LRange.makeGuaranteedNoWrapRegion(BinaryOperator::Add, LRange,
- OBO::NoSignedWrap);
+ ConstantRange NSWRange = ConstantRange::makeGuaranteedNoWrapRegion(
+ BinaryOperator::Add, LRange, OBO::NoSignedWrap);
if (!NSWRange.isEmptySet()) {
bool NewNSW = NSWRange.contains(LazyRRange());
AddOp->setHasNoSignedWrap(NewNSW);
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 48d5ae88cda9..6693a26e8890 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -144,6 +144,10 @@ private:
bool recognizePopcount();
void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
PHINode *CntPhi, Value *Var);
+ bool recognizeAndInsertCTLZ();
+ void transformLoopToCountable(BasicBlock *PreCondBB, Instruction *CntInst,
+ PHINode *CntPhi, Value *Var, const DebugLoc DL,
+ bool ZeroCheck, bool IsCntPhiUsedOutsideLoop);
/// @}
};
@@ -994,7 +998,7 @@ bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
}
bool LoopIdiomRecognize::runOnNoncountableLoop() {
- return recognizePopcount();
+ return recognizePopcount() || recognizeAndInsertCTLZ();
}
/// Check if the given conditional branch is based on the comparison between
@@ -1159,6 +1163,167 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
return true;
}
+/// Return true if the idiom is detected in the loop.
+///
+/// Additionally:
+/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
+/// or nullptr if there is no such.
+/// 2) \p CntPhi is set to the corresponding phi node
+/// or nullptr if there is no such.
+/// 3) \p Var is set to the value whose CTLZ could be used.
+/// 4) \p DefX is set to the instruction calculating Loop exit condition.
+///
+/// The core idiom we are trying to detect is:
+/// \code
+/// if (x0 == 0)
+/// goto loop-exit // the precondition of the loop
+/// cnt0 = init-val;
+/// do {
+/// x = phi (x0, x.next); //PhiX
+/// cnt = phi(cnt0, cnt.next);
+///
+/// cnt.next = cnt + 1;
+/// ...
+/// x.next = x >> 1; // DefX
+/// ...
+/// } while(x.next != 0);
+///
+/// loop-exit:
+/// \endcode
+static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,
+ Instruction *&CntInst, PHINode *&CntPhi,
+ Instruction *&DefX) {
+ BasicBlock *LoopEntry;
+ Value *VarX = nullptr;
+
+ DefX = nullptr;
+ PhiX = nullptr;
+ CntInst = nullptr;
+ CntPhi = nullptr;
+ LoopEntry = *(CurLoop->block_begin());
+
+ // step 1: Check if the loop-back branch is in desirable form.
+ if (Value *T = matchCondition(
+ dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
+ DefX = dyn_cast<Instruction>(T);
+ else
+ return false;
+
+ // step 2: detect instructions corresponding to "x.next = x >> 1"
+ if (!DefX || DefX->getOpcode() != Instruction::AShr)
+ return false;
+ if (ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1)))
+ if (!Shft || !Shft->isOne())
+ return false;
+ VarX = DefX->getOperand(0);
+
+ // step 3: Check the recurrence of variable X
+ PhiX = dyn_cast<PHINode>(VarX);
+ if (!PhiX || (PhiX->getOperand(0) != DefX && PhiX->getOperand(1) != DefX))
+ return false;
+
+ // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
+ // TODO: We can skip the step. If loop trip count is known (CTLZ),
+ // then all uses of "cnt.next" could be optimized to the trip count
+ // plus "cnt0". Currently it is not optimized.
+ // This step could be used to detect POPCNT instruction:
+ // cnt.next = cnt + (x.next & 1)
+ for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
+ IterE = LoopEntry->end();
+ Iter != IterE; Iter++) {
+ Instruction *Inst = &*Iter;
+ if (Inst->getOpcode() != Instruction::Add)
+ continue;
+
+ ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
+ if (!Inc || !Inc->isOne())
+ continue;
+
+ PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0));
+ if (!Phi || Phi->getParent() != LoopEntry)
+ continue;
+
+ CntInst = Inst;
+ CntPhi = Phi;
+ break;
+ }
+ if (!CntInst)
+ return false;
+
+ return true;
+}
+
+/// Recognize CTLZ idiom in a non-countable loop and convert the loop
+/// to countable (with CTLZ trip count).
+/// If CTLZ inserted as a new trip count returns true; otherwise, returns false.
+bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {
+ // Give up if the loop has multiple blocks or multiple backedges.
+ if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
+ return false;
+
+ Instruction *CntInst, *DefX;
+ PHINode *CntPhi, *PhiX;
+ if (!detectCTLZIdiom(CurLoop, PhiX, CntInst, CntPhi, DefX))
+ return false;
+
+ bool IsCntPhiUsedOutsideLoop = false;
+ for (User *U : CntPhi->users())
+ if (!CurLoop->contains(dyn_cast<Instruction>(U))) {
+ IsCntPhiUsedOutsideLoop = true;
+ break;
+ }
+ bool IsCntInstUsedOutsideLoop = false;
+ for (User *U : CntInst->users())
+ if (!CurLoop->contains(dyn_cast<Instruction>(U))) {
+ IsCntInstUsedOutsideLoop = true;
+ break;
+ }
+ // If both CntInst and CntPhi are used outside the loop the profitability
+ // is questionable.
+ if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)
+ return false;
+
+ // For some CPUs result of CTLZ(X) intrinsic is undefined
+ // when X is 0. If we can not guarantee X != 0, we need to check this
+ // when expand.
+ bool ZeroCheck = false;
+ // It is safe to assume Preheader exist as it was checked in
+ // parent function RunOnLoop.
+ BasicBlock *PH = CurLoop->getLoopPreheader();
+ Value *InitX = PhiX->getIncomingValueForBlock(PH);
+ // If we check X != 0 before entering the loop we don't need a zero
+ // check in CTLZ intrinsic.
+ if (BasicBlock *PreCondBB = PH->getSinglePredecessor())
+ if (BranchInst *PreCondBr =
+ dyn_cast<BranchInst>(PreCondBB->getTerminator())) {
+ if (matchCondition(PreCondBr, PH) == InitX)
+ ZeroCheck = true;
+ }
+
+ // Check if CTLZ intrinsic is profitable. Assume it is always profitable
+ // if we delete the loop (the loop has only 6 instructions):
+ // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
+ // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
+ // %shr = ashr %n.addr.0, 1
+ // %tobool = icmp eq %shr, 0
+ // %inc = add nsw %i.0, 1
+ // br i1 %tobool
+
+ IRBuilder<> Builder(PH->getTerminator());
+ SmallVector<const Value *, 2> Ops =
+ {InitX, ZeroCheck ? Builder.getTrue() : Builder.getFalse()};
+ ArrayRef<const Value *> Args(Ops);
+ if (CurLoop->getHeader()->size() != 6 &&
+ TTI->getIntrinsicCost(Intrinsic::ctlz, InitX->getType(), Args) >
+ TargetTransformInfo::TCC_Basic)
+ return false;
+
+ const DebugLoc DL = DefX->getDebugLoc();
+ transformLoopToCountable(PH, CntInst, CntPhi, InitX, DL, ZeroCheck,
+ IsCntPhiUsedOutsideLoop);
+ return true;
+}
+
/// Recognizes a population count idiom in a non-countable loop.
///
/// If detected, transforms the relevant code to issue the popcount intrinsic
@@ -1222,6 +1387,134 @@ static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
return CI;
}
+static CallInst *createCTLZIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
+ const DebugLoc &DL, bool ZeroCheck) {
+ Value *Ops[] = {Val, ZeroCheck ? IRBuilder.getTrue() : IRBuilder.getFalse()};
+ Type *Tys[] = {Val->getType()};
+
+ Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
+ Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctlz, Tys);
+ CallInst *CI = IRBuilder.CreateCall(Func, Ops);
+ CI->setDebugLoc(DL);
+
+ return CI;
+}
+
+/// Transform the following loop:
+/// loop:
+/// CntPhi = PHI [Cnt0, CntInst]
+/// PhiX = PHI [InitX, DefX]
+/// CntInst = CntPhi + 1
+/// DefX = PhiX >> 1
+// LOOP_BODY
+/// Br: loop if (DefX != 0)
+/// Use(CntPhi) or Use(CntInst)
+///
+/// Into:
+/// If CntPhi used outside the loop:
+/// CountPrev = BitWidth(InitX) - CTLZ(InitX >> 1)
+/// Count = CountPrev + 1
+/// else
+/// Count = BitWidth(InitX) - CTLZ(InitX)
+/// loop:
+/// CntPhi = PHI [Cnt0, CntInst]
+/// PhiX = PHI [InitX, DefX]
+/// PhiCount = PHI [Count, Dec]
+/// CntInst = CntPhi + 1
+/// DefX = PhiX >> 1
+/// Dec = PhiCount - 1
+/// LOOP_BODY
+/// Br: loop if (Dec != 0)
+/// Use(CountPrev + Cnt0) // Use(CntPhi)
+/// or
+/// Use(Count + Cnt0) // Use(CntInst)
+///
+/// If LOOP_BODY is empty the loop will be deleted.
+/// If CntInst and DefX are not used in LOOP_BODY they will be removed.
+void LoopIdiomRecognize::transformLoopToCountable(
+ BasicBlock *Preheader, Instruction *CntInst, PHINode *CntPhi, Value *InitX,
+ const DebugLoc DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
+ BranchInst *PreheaderBr = dyn_cast<BranchInst>(Preheader->getTerminator());
+
+ // Step 1: Insert the CTLZ instruction at the end of the preheader block
+ // Count = BitWidth - CTLZ(InitX);
+ // If there are uses of CntPhi create:
+ // CountPrev = BitWidth - CTLZ(InitX >> 1);
+ IRBuilder<> Builder(PreheaderBr);
+ Builder.SetCurrentDebugLocation(DL);
+ Value *CTLZ, *Count, *CountPrev, *NewCount, *InitXNext;
+
+ if (IsCntPhiUsedOutsideLoop)
+ InitXNext = Builder.CreateAShr(InitX,
+ ConstantInt::get(InitX->getType(), 1));
+ else
+ InitXNext = InitX;
+ CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck);
+ Count = Builder.CreateSub(
+ ConstantInt::get(CTLZ->getType(),
+ CTLZ->getType()->getIntegerBitWidth()),
+ CTLZ);
+ if (IsCntPhiUsedOutsideLoop) {
+ CountPrev = Count;
+ Count = Builder.CreateAdd(
+ CountPrev,
+ ConstantInt::get(CountPrev->getType(), 1));
+ }
+ if (IsCntPhiUsedOutsideLoop)
+ NewCount = Builder.CreateZExtOrTrunc(CountPrev,
+ cast<IntegerType>(CntInst->getType()));
+ else
+ NewCount = Builder.CreateZExtOrTrunc(Count,
+ cast<IntegerType>(CntInst->getType()));
+
+ // If the CTLZ counter's initial value is not zero, insert Add Inst.
+ Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
+ ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
+ if (!InitConst || !InitConst->isZero())
+ NewCount = Builder.CreateAdd(NewCount, CntInitVal);
+
+ // Step 2: Insert new IV and loop condition:
+ // loop:
+ // ...
+ // PhiCount = PHI [Count, Dec]
+ // ...
+ // Dec = PhiCount - 1
+ // ...
+ // Br: loop if (Dec != 0)
+ BasicBlock *Body = *(CurLoop->block_begin());
+ auto *LbBr = dyn_cast<BranchInst>(Body->getTerminator());
+ ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
+ Type *Ty = Count->getType();
+
+ PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
+
+ Builder.SetInsertPoint(LbCond);
+ Instruction *TcDec = cast<Instruction>(
+ Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
+ "tcdec", false, true));
+
+ TcPhi->addIncoming(Count, Preheader);
+ TcPhi->addIncoming(TcDec, Body);
+
+ CmpInst::Predicate Pred =
+ (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
+ LbCond->setPredicate(Pred);
+ LbCond->setOperand(0, TcDec);
+ LbCond->setOperand(1, ConstantInt::get(Ty, 0));
+
+ // Step 3: All the references to the original counter outside
+ // the loop are replaced with the NewCount -- the value returned from
+ // __builtin_ctlz(x).
+ if (IsCntPhiUsedOutsideLoop)
+ CntPhi->replaceUsesOutsideBlock(NewCount, Body);
+ else
+ CntInst->replaceUsesOutsideBlock(NewCount, Body);
+
+ // step 4: Forget the "non-computable" trip-count SCEV associated with the
+ // loop. The loop would otherwise not be deleted even if it becomes empty.
+ SE->forgetLoop(CurLoop);
+}
+
void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
Instruction *CntInst,
PHINode *CntPhi, Value *Var) {
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 3c9850b156ac..5e0a705782ea 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -283,7 +283,6 @@ public:
// Forward propagation info
const Expression *getDefiningExpr() const { return DefiningExpr; }
- void setDefiningExpr(const Expression *E) { DefiningExpr = E; }
// Value member set
bool empty() const { return Members.empty(); }
@@ -317,6 +316,9 @@ public:
--StoreCount;
}
+ // True if this class has no memory members.
+ bool definesNoMemory() const { return StoreCount == 0 && memory_empty(); }
+
// Return true if two congruence classes are equivalent to each other. This
// means
// that every field but the ID number and the dead field are equivalent.
@@ -401,9 +403,12 @@ class NewGVN {
MemorySSAWalker *MSSAWalker;
const DataLayout &DL;
std::unique_ptr<PredicateInfo> PredInfo;
- BumpPtrAllocator ExpressionAllocator;
- ArrayRecycler<Value *> ArgRecycler;
- TarjanSCC SCCFinder;
+
+ // These are the only two things the create* functions should have
+ // side-effects on due to allocating memory.
+ mutable BumpPtrAllocator ExpressionAllocator;
+ mutable ArrayRecycler<Value *> ArgRecycler;
+ mutable TarjanSCC SCCFinder;
const SimplifyQuery SQ;
// Number of function arguments, used by ranking
@@ -430,11 +435,12 @@ class NewGVN {
// In order to correctly ensure propagation, we must keep track of what
// comparisons we used, so that when the values of the comparisons change, we
// propagate the information to the places we used the comparison.
- DenseMap<const Value *, SmallPtrSet<Instruction *, 2>> PredicateToUsers;
- // Mapping from MemoryAccess we used to the MemoryAccess we used it with. Has
+ mutable DenseMap<const Value *, SmallPtrSet<Instruction *, 2>>
+ PredicateToUsers;
// the same reasoning as PredicateToUsers. When we skip MemoryAccesses for
// stores, we no longer can rely solely on the def-use chains of MemorySSA.
- DenseMap<const MemoryAccess *, SmallPtrSet<MemoryAccess *, 2>> MemoryToUsers;
+ mutable DenseMap<const MemoryAccess *, SmallPtrSet<MemoryAccess *, 2>>
+ MemoryToUsers;
// A table storing which memorydefs/phis represent a memory state provably
// equivalent to another memory state.
@@ -457,7 +463,7 @@ class NewGVN {
DenseMap<const MemoryPhi *, MemoryPhiState> MemoryPhiState;
enum PhiCycleState { PCS_Unknown, PCS_CycleFree, PCS_Cycle };
- DenseMap<const PHINode *, PhiCycleState> PhiCycleState;
+ mutable DenseMap<const PHINode *, PhiCycleState> PhiCycleState;
// Expression to class mapping.
using ExpressionClassMap = DenseMap<const Expression *, CongruenceClass *>;
ExpressionClassMap ExpressionToClass;
@@ -511,21 +517,24 @@ public:
private:
// Expression handling.
- const Expression *createExpression(Instruction *);
- const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *);
- PHIExpression *createPHIExpression(Instruction *, bool &HasBackedge,
- bool &AllConstant);
- const VariableExpression *createVariableExpression(Value *);
- const ConstantExpression *createConstantExpression(Constant *);
- const Expression *createVariableOrConstant(Value *V);
- const UnknownExpression *createUnknownExpression(Instruction *);
+ const Expression *createExpression(Instruction *) const;
+ const Expression *createBinaryExpression(unsigned, Type *, Value *,
+ Value *) const;
+ PHIExpression *createPHIExpression(Instruction *, bool &HasBackEdge,
+ bool &AllConstant) const;
+ const VariableExpression *createVariableExpression(Value *) const;
+ const ConstantExpression *createConstantExpression(Constant *) const;
+ const Expression *createVariableOrConstant(Value *V) const;
+ const UnknownExpression *createUnknownExpression(Instruction *) const;
const StoreExpression *createStoreExpression(StoreInst *,
- const MemoryAccess *);
+ const MemoryAccess *) const;
LoadExpression *createLoadExpression(Type *, Value *, LoadInst *,
- const MemoryAccess *);
- const CallExpression *createCallExpression(CallInst *, const MemoryAccess *);
- const AggregateValueExpression *createAggregateValueExpression(Instruction *);
- bool setBasicExpressionInfo(Instruction *, BasicExpression *);
+ const MemoryAccess *) const;
+ const CallExpression *createCallExpression(CallInst *,
+ const MemoryAccess *) const;
+ const AggregateValueExpression *
+ createAggregateValueExpression(Instruction *) const;
+ bool setBasicExpressionInfo(Instruction *, BasicExpression *) const;
// Congruence class handling.
CongruenceClass *createCongruenceClass(Value *Leader, const Expression *E) {
@@ -560,17 +569,18 @@ private:
// Symbolic evaluation.
const Expression *checkSimplificationResults(Expression *, Instruction *,
- Value *);
- const Expression *performSymbolicEvaluation(Value *);
+ Value *) const;
+ const Expression *performSymbolicEvaluation(Value *) const;
const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *,
- Instruction *, MemoryAccess *);
- const Expression *performSymbolicLoadEvaluation(Instruction *);
- const Expression *performSymbolicStoreEvaluation(Instruction *);
- const Expression *performSymbolicCallEvaluation(Instruction *);
- const Expression *performSymbolicPHIEvaluation(Instruction *);
- const Expression *performSymbolicAggrValueEvaluation(Instruction *);
- const Expression *performSymbolicCmpEvaluation(Instruction *);
- const Expression *performSymbolicPredicateInfoEvaluation(Instruction *);
+ Instruction *,
+ MemoryAccess *) const;
+ const Expression *performSymbolicLoadEvaluation(Instruction *) const;
+ const Expression *performSymbolicStoreEvaluation(Instruction *) const;
+ const Expression *performSymbolicCallEvaluation(Instruction *) const;
+ const Expression *performSymbolicPHIEvaluation(Instruction *) const;
+ const Expression *performSymbolicAggrValueEvaluation(Instruction *) const;
+ const Expression *performSymbolicCmpEvaluation(Instruction *) const;
+ const Expression *performSymbolicPredicateInfoEvaluation(Instruction *) const;
// Congruence finding.
bool someEquivalentDominates(const Instruction *, const Instruction *) const;
@@ -620,8 +630,8 @@ private:
void markPredicateUsersTouched(Instruction *);
void markValueLeaderChangeTouched(CongruenceClass *CC);
void markMemoryLeaderChangeTouched(CongruenceClass *CC);
- void addPredicateUsers(const PredicateBase *, Instruction *);
- void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U);
+ void addPredicateUsers(const PredicateBase *, Instruction *) const;
+ void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const;
// Main loop of value numbering
void iterateTouchedInstructions();
@@ -634,7 +644,7 @@ private:
void verifyIterationSettled(Function &F);
bool singleReachablePHIPath(const MemoryAccess *, const MemoryAccess *) const;
BasicBlock *getBlockForValue(Value *V) const;
- void deleteExpression(const Expression *E);
+ void deleteExpression(const Expression *E) const;
unsigned InstrToDFSNum(const Value *V) const {
assert(isa<Instruction>(V) && "This should not be used for MemoryAccesses");
return InstrDFS.lookup(V);
@@ -654,7 +664,7 @@ private:
? InstrToDFSNum(cast<MemoryUseOrDef>(MA)->getMemoryInst())
: InstrDFS.lookup(MA);
}
- bool isCycleFree(const PHINode *PN);
+ bool isCycleFree(const PHINode *PN) const;
template <class T, class Range> T *getMinDFSOfRange(const Range &) const;
// Debug counter info. When verifying, we have to reset the value numbering
// debug counter to the same state it started in to get the same results.
@@ -702,7 +712,7 @@ BasicBlock *NewGVN::getBlockForValue(Value *V) const {
// Delete a definitely dead expression, so it can be reused by the expression
// allocator. Some of these are not in creation functions, so we have to accept
// const versions.
-void NewGVN::deleteExpression(const Expression *E) {
+void NewGVN::deleteExpression(const Expression *E) const {
assert(isa<BasicExpression>(E));
auto *BE = cast<BasicExpression>(E);
const_cast<BasicExpression *>(BE)->deallocateOperands(ArgRecycler);
@@ -710,7 +720,7 @@ void NewGVN::deleteExpression(const Expression *E) {
}
PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
- bool &AllConstant) {
+ bool &AllConstant) const {
BasicBlock *PHIBlock = I->getParent();
auto *PN = cast<PHINode>(I);
auto *E =
@@ -722,30 +732,46 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
unsigned PHIRPO = RPOOrdering.lookup(DT->getNode(PHIBlock));
+ // NewGVN assumes the operands of a PHI node are in a consistent order across
+ // PHIs. LLVM doesn't seem to always guarantee this. While we need to fix
+ // this in LLVM at some point we don't want GVN to find wrong congruences.
+ // Therefore, here we sort uses in predecessor order.
+ // We're sorting the values by pointer. In theory this might be cause of
+ // non-determinism, but here we don't rely on the ordering for anything
+ // significant, e.g. we don't create new instructions based on it so we're
+ // fine.
+ SmallVector<const Use *, 4> PHIOperands;
+ for (const Use &U : PN->operands())
+ PHIOperands.push_back(&U);
+ std::sort(PHIOperands.begin(), PHIOperands.end(),
+ [&](const Use *U1, const Use *U2) {
+ return PN->getIncomingBlock(*U1) < PN->getIncomingBlock(*U2);
+ });
+
// Filter out unreachable phi operands.
- auto Filtered = make_filter_range(PN->operands(), [&](const Use &U) {
- return ReachableEdges.count({PN->getIncomingBlock(U), PHIBlock});
+ auto Filtered = make_filter_range(PHIOperands, [&](const Use *U) {
+ return ReachableEdges.count({PN->getIncomingBlock(*U), PHIBlock});
});
std::transform(Filtered.begin(), Filtered.end(), op_inserter(E),
- [&](const Use &U) -> Value * {
- auto *BB = PN->getIncomingBlock(U);
+ [&](const Use *U) -> Value * {
+ auto *BB = PN->getIncomingBlock(*U);
auto *DTN = DT->getNode(BB);
if (RPOOrdering.lookup(DTN) >= PHIRPO)
HasBackedge = true;
- AllConstant &= isa<UndefValue>(U) || isa<Constant>(U);
+ AllConstant &= isa<UndefValue>(*U) || isa<Constant>(*U);
// Don't try to transform self-defined phis.
- if (U == PN)
+ if (*U == PN)
return PN;
- return lookupOperandLeader(U);
+ return lookupOperandLeader(*U);
});
return E;
}
// Set basic expression info (Arguments, type, opcode) for Expression
// E from Instruction I in block B.
-bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) {
+bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) const {
bool AllConstant = true;
if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
E->setType(GEP->getSourceElementType());
@@ -766,7 +792,8 @@ bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) {
}
const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T,
- Value *Arg1, Value *Arg2) {
+ Value *Arg1,
+ Value *Arg2) const {
auto *E = new (ExpressionAllocator) BasicExpression(2);
E->setType(T);
@@ -795,7 +822,8 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T,
// TODO: Once finished, this should not take an Instruction, we only
// use it for printing.
const Expression *NewGVN::checkSimplificationResults(Expression *E,
- Instruction *I, Value *V) {
+ Instruction *I,
+ Value *V) const {
if (!V)
return nullptr;
if (auto *C = dyn_cast<Constant>(V)) {
@@ -827,7 +855,7 @@ const Expression *NewGVN::checkSimplificationResults(Expression *E,
return nullptr;
}
-const Expression *NewGVN::createExpression(Instruction *I) {
+const Expression *NewGVN::createExpression(Instruction *I) const {
auto *E = new (ExpressionAllocator) BasicExpression(I->getNumOperands());
bool AllConstant = setBasicExpressionInfo(I, E);
@@ -913,7 +941,7 @@ const Expression *NewGVN::createExpression(Instruction *I) {
}
const AggregateValueExpression *
-NewGVN::createAggregateValueExpression(Instruction *I) {
+NewGVN::createAggregateValueExpression(Instruction *I) const {
if (auto *II = dyn_cast<InsertValueInst>(I)) {
auto *E = new (ExpressionAllocator)
AggregateValueExpression(I->getNumOperands(), II->getNumIndices());
@@ -932,32 +960,32 @@ NewGVN::createAggregateValueExpression(Instruction *I) {
llvm_unreachable("Unhandled type of aggregate value operation");
}
-const VariableExpression *NewGVN::createVariableExpression(Value *V) {
+const VariableExpression *NewGVN::createVariableExpression(Value *V) const {
auto *E = new (ExpressionAllocator) VariableExpression(V);
E->setOpcode(V->getValueID());
return E;
}
-const Expression *NewGVN::createVariableOrConstant(Value *V) {
+const Expression *NewGVN::createVariableOrConstant(Value *V) const {
if (auto *C = dyn_cast<Constant>(V))
return createConstantExpression(C);
return createVariableExpression(V);
}
-const ConstantExpression *NewGVN::createConstantExpression(Constant *C) {
+const ConstantExpression *NewGVN::createConstantExpression(Constant *C) const {
auto *E = new (ExpressionAllocator) ConstantExpression(C);
E->setOpcode(C->getValueID());
return E;
}
-const UnknownExpression *NewGVN::createUnknownExpression(Instruction *I) {
+const UnknownExpression *NewGVN::createUnknownExpression(Instruction *I) const {
auto *E = new (ExpressionAllocator) UnknownExpression(I);
E->setOpcode(I->getOpcode());
return E;
}
-const CallExpression *NewGVN::createCallExpression(CallInst *CI,
- const MemoryAccess *MA) {
+const CallExpression *
+NewGVN::createCallExpression(CallInst *CI, const MemoryAccess *MA) const {
// FIXME: Add operand bundles for calls.
auto *E =
new (ExpressionAllocator) CallExpression(CI->getNumOperands(), CI, MA);
@@ -1017,9 +1045,8 @@ Value *NewGVN::lookupOperandLeader(Value *V) const {
const MemoryAccess *NewGVN::lookupMemoryLeader(const MemoryAccess *MA) const {
auto *CC = getMemoryClass(MA);
assert(CC->getMemoryLeader() &&
- "Every MemoryAccess should be mapped to a "
- "congruence class with a represenative memory "
- "access");
+ "Every MemoryAccess should be mapped to a congruence class with a "
+ "representative memory access");
return CC->getMemoryLeader();
}
@@ -1032,7 +1059,7 @@ bool NewGVN::isMemoryAccessTop(const MemoryAccess *MA) const {
LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp,
LoadInst *LI,
- const MemoryAccess *MA) {
+ const MemoryAccess *MA) const {
auto *E =
new (ExpressionAllocator) LoadExpression(1, LI, lookupMemoryLeader(MA));
E->allocateOperands(ArgRecycler, ExpressionAllocator);
@@ -1050,8 +1077,8 @@ LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp,
return E;
}
-const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI,
- const MemoryAccess *MA) {
+const StoreExpression *
+NewGVN::createStoreExpression(StoreInst *SI, const MemoryAccess *MA) const {
auto *StoredValueLeader = lookupOperandLeader(SI->getValueOperand());
auto *E = new (ExpressionAllocator)
StoreExpression(SI->getNumOperands(), SI, StoredValueLeader, MA);
@@ -1068,7 +1095,7 @@ const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI,
return E;
}
-const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) {
+const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const {
// Unlike loads, we never try to eliminate stores, so we do not check if they
// are simple and avoid value numbering them.
auto *SI = cast<StoreInst>(I);
@@ -1126,7 +1153,7 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) {
const Expression *
NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr,
LoadInst *LI, Instruction *DepInst,
- MemoryAccess *DefiningAccess) {
+ MemoryAccess *DefiningAccess) const {
assert((!LI || LI->isSimple()) && "Not a simple load");
if (auto *DepSI = dyn_cast<StoreInst>(DepInst)) {
// Can't forward from non-atomic to atomic without violating memory model.
@@ -1201,7 +1228,7 @@ NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr,
return nullptr;
}
-const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) {
+const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const {
auto *LI = cast<LoadInst>(I);
// We can eliminate in favor of non-simple loads, but we won't be able to
@@ -1239,7 +1266,7 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) {
}
const Expression *
-NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) {
+NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const {
auto *PI = PredInfo->getPredicateInfoFor(I);
if (!PI)
return nullptr;
@@ -1284,7 +1311,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) {
return nullptr;
if (CopyOf != Cmp->getOperand(0) && CopyOf != Cmp->getOperand(1)) {
- DEBUG(dbgs() << "Copy is not of any condition operands!");
+ DEBUG(dbgs() << "Copy is not of any condition operands!\n");
return nullptr;
}
Value *FirstOp = lookupOperandLeader(Cmp->getOperand(0));
@@ -1329,7 +1356,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) {
}
// Evaluate read only and pure calls, and create an expression result.
-const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I) {
+const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I) const {
auto *CI = cast<CallInst>(I);
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
// Instrinsics with the returned attribute are copies of arguments.
@@ -1366,8 +1393,7 @@ bool NewGVN::setMemoryClass(const MemoryAccess *From,
DEBUG(dbgs() << "Setting " << *From);
DEBUG(dbgs() << " equivalent to congruence class ");
DEBUG(dbgs() << NewClass->getID() << " with current MemoryAccess leader ");
- DEBUG(dbgs() << *NewClass->getMemoryLeader());
- DEBUG(dbgs() << "\n");
+ DEBUG(dbgs() << *NewClass->getMemoryLeader() << "\n");
auto LookupResult = MemoryAccessToClass.find(From);
bool Changed = false;
@@ -1381,7 +1407,7 @@ bool NewGVN::setMemoryClass(const MemoryAccess *From,
NewClass->memory_insert(MP);
// This may have killed the class if it had no non-memory members
if (OldClass->getMemoryLeader() == From) {
- if (OldClass->memory_empty()) {
+ if (OldClass->definesNoMemory()) {
OldClass->setMemoryLeader(nullptr);
} else {
OldClass->setMemoryLeader(getNextMemoryLeader(OldClass));
@@ -1406,7 +1432,7 @@ bool NewGVN::setMemoryClass(const MemoryAccess *From,
// Determine if a phi is cycle-free. That means the values in the phi don't
// depend on any expressions that can change value as a result of the phi.
// For example, a non-cycle free phi would be v = phi(0, v+1).
-bool NewGVN::isCycleFree(const PHINode *PN) {
+bool NewGVN::isCycleFree(const PHINode *PN) const {
// In order to compute cycle-freeness, we do SCC finding on the phi, and see
// what kind of SCC it ends up in. If it is a singleton, it is cycle-free.
// If it is not in a singleton, it is only cycle free if the other members are
@@ -1436,7 +1462,7 @@ bool NewGVN::isCycleFree(const PHINode *PN) {
}
// Evaluate PHI nodes symbolically, and create an expression result.
-const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) {
+const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
// True if one of the incoming phi edges is a backedge.
bool HasBackedge = false;
// All constant tracks the state of whether all the *original* phi operands
@@ -1510,7 +1536,8 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) {
return E;
}
-const Expression *NewGVN::performSymbolicAggrValueEvaluation(Instruction *I) {
+const Expression *
+NewGVN::performSymbolicAggrValueEvaluation(Instruction *I) const {
if (auto *EI = dyn_cast<ExtractValueInst>(I)) {
auto *II = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
if (II && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) {
@@ -1548,7 +1575,7 @@ const Expression *NewGVN::performSymbolicAggrValueEvaluation(Instruction *I) {
return createAggregateValueExpression(I);
}
-const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) {
+const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) const {
auto *CI = dyn_cast<CmpInst>(I);
// See if our operands are equal to those of a previous predicate, and if so,
// if it implies true or false.
@@ -1663,7 +1690,7 @@ const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) {
}
// Substitute and symbolize the value before value numbering.
-const Expression *NewGVN::performSymbolicEvaluation(Value *V) {
+const Expression *NewGVN::performSymbolicEvaluation(Value *V) const {
const Expression *E = nullptr;
if (auto *C = dyn_cast<Constant>(V))
E = createConstantExpression(C);
@@ -1749,7 +1776,7 @@ void NewGVN::markUsersTouched(Value *V) {
}
}
-void NewGVN::addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) {
+void NewGVN::addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const {
DEBUG(dbgs() << "Adding memory user " << *U << " to " << *To << "\n");
MemoryToUsers[To].insert(U);
}
@@ -1772,7 +1799,7 @@ void NewGVN::markMemoryUsersTouched(const MemoryAccess *MA) {
}
// Add I to the set of users of a given predicate.
-void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) {
+void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) const {
if (auto *PBranch = dyn_cast<PredicateBranch>(PB))
PredicateToUsers[PBranch->Condition].insert(I);
else if (auto *PAssume = dyn_cast<PredicateBranch>(PB))
@@ -1825,8 +1852,7 @@ const MemoryAccess *NewGVN::getNextMemoryLeader(CongruenceClass *CC) const {
// TODO: If this ends up to slow, we can maintain a next memory leader like we
// do for regular leaders.
// Make sure there will be a leader to find
- assert((CC->getStoreCount() > 0 || !CC->memory_empty()) &&
- "Can't get next leader if there is none");
+ assert(!CC->definesNoMemory() && "Can't get next leader if there is none");
if (CC->getStoreCount() > 0) {
if (auto *NL = dyn_cast_or_null<StoreInst>(CC->getNextLeader().first))
return MSSA->getMemoryAccess(NL);
@@ -1898,7 +1924,7 @@ void NewGVN::moveMemoryToNewCongruenceClass(Instruction *I,
setMemoryClass(InstMA, NewClass);
// Now, fixup the old class if necessary
if (OldClass->getMemoryLeader() == InstMA) {
- if (OldClass->getStoreCount() != 0 || !OldClass->memory_empty()) {
+ if (!OldClass->definesNoMemory()) {
OldClass->setMemoryLeader(getNextMemoryLeader(OldClass));
DEBUG(dbgs() << "Memory class leader change for class "
<< OldClass->getID() << " to "
@@ -1956,10 +1982,9 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
if (NewClass->getStoreCount() == 0 && !NewClass->getStoredValue()) {
// If it's a store expression we are using, it means we are not equivalent
// to something earlier.
- if (isa<StoreExpression>(E)) {
- assert(lookupOperandLeader(SI->getValueOperand()) !=
- NewClass->getLeader());
- NewClass->setStoredValue(lookupOperandLeader(SI->getValueOperand()));
+ if (auto *SE = dyn_cast<StoreExpression>(E)) {
+ assert(SE->getStoredValue() != NewClass->getLeader());
+ NewClass->setStoredValue(SE->getStoredValue());
markValueLeaderChangeTouched(NewClass);
// Shift the new class leader to be the store
DEBUG(dbgs() << "Changing leader of congruence class "
@@ -1985,7 +2010,7 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
// See if we destroyed the class or need to swap leaders.
if (OldClass->empty() && OldClass != TOPClass) {
if (OldClass->getDefiningExpr()) {
- DEBUG(dbgs() << "Erasing expression " << OldClass->getDefiningExpr()
+ DEBUG(dbgs() << "Erasing expression " << *OldClass->getDefiningExpr()
<< " from table\n");
ExpressionToClass.erase(OldClass->getDefiningExpr());
}
@@ -2064,7 +2089,7 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
} else if (const auto *SE = dyn_cast<StoreExpression>(E)) {
StoreInst *SI = SE->getStoreInst();
NewClass->setLeader(SI);
- NewClass->setStoredValue(lookupOperandLeader(SI->getValueOperand()));
+ NewClass->setStoredValue(SE->getStoredValue());
// The RepMemoryAccess field will be filled in properly by the
// moveValueToNewCongruenceClass call.
} else {
@@ -2523,6 +2548,19 @@ void NewGVN::verifyMemoryCongruency() const {
return false;
if (auto *MemDef = dyn_cast<MemoryDef>(Pair.first))
return !isInstructionTriviallyDead(MemDef->getMemoryInst());
+
+ // We could have phi nodes which operands are all trivially dead,
+ // so we don't process them.
+ if (auto *MemPHI = dyn_cast<MemoryPhi>(Pair.first)) {
+ for (auto &U : MemPHI->incoming_values()) {
+ if (Instruction *I = dyn_cast<Instruction>(U.get())) {
+ if (!isInstructionTriviallyDead(I))
+ return true;
+ }
+ }
+ return false;
+ }
+
return true;
};
diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index fb1b47c48276..4f608c97147d 100644
--- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -55,7 +55,7 @@ static void replaceLoopUsesWithConstant(Loop &L, Value &LIC,
/// Update the dominator tree after removing one exiting predecessor of a loop
/// exit block.
static void updateLoopExitIDom(BasicBlock *LoopExitBB, Loop &L,
- DominatorTree &DT) {
+ DominatorTree &DT) {
assert(pred_begin(LoopExitBB) != pred_end(LoopExitBB) &&
"Cannot have empty predecessors of the loop exit block if we split "
"off a block to unswitch!");
@@ -137,6 +137,98 @@ static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH,
}
}
+/// Check that all the LCSSA PHI nodes in the loop exit block have trivial
+/// incoming values along this edge.
+static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
+ BasicBlock &ExitBB) {
+ for (Instruction &I : ExitBB) {
+ auto *PN = dyn_cast<PHINode>(&I);
+ if (!PN)
+ // No more PHIs to check.
+ return true;
+
+ // If the incoming value for this edge isn't loop invariant the unswitch
+ // won't be trivial.
+ if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
+ return false;
+ }
+ llvm_unreachable("Basic blocks should never be empty!");
+}
+
+/// Rewrite the PHI nodes in an unswitched loop exit basic block.
+///
+/// Requires that the loop exit and unswitched basic block are the same, and
+/// that the exiting block was a unique predecessor of that block. Rewrites the
+/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
+/// PHI nodes from the old preheader that now contains the unswitched
+/// terminator.
+static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB,
+ BasicBlock &OldExitingBB,
+ BasicBlock &OldPH) {
+ for (Instruction &I : UnswitchedBB) {
+ auto *PN = dyn_cast<PHINode>(&I);
+ if (!PN)
+ // No more PHIs to check.
+ break;
+
+ // When the loop exit is directly unswitched we just need to update the
+ // incoming basic block. We loop to handle weird cases with repeated
+ // incoming blocks, but expect to typically only have one operand here.
+ for (auto i : llvm::seq<int>(0, PN->getNumOperands())) {
+ assert(PN->getIncomingBlock(i) == &OldExitingBB &&
+ "Found incoming block different from unique predecessor!");
+ PN->setIncomingBlock(i, &OldPH);
+ }
+ }
+}
+
+/// Rewrite the PHI nodes in the loop exit basic block and the split off
+/// unswitched block.
+///
+/// Because the exit block remains an exit from the loop, this rewrites the
+/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
+/// nodes into the unswitched basic block to select between the value in the
+/// old preheader and the loop exit.
+static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
+ BasicBlock &UnswitchedBB,
+ BasicBlock &OldExitingBB,
+ BasicBlock &OldPH) {
+ assert(&ExitBB != &UnswitchedBB &&
+ "Must have different loop exit and unswitched blocks!");
+ Instruction *InsertPt = &*UnswitchedBB.begin();
+ for (Instruction &I : ExitBB) {
+ auto *PN = dyn_cast<PHINode>(&I);
+ if (!PN)
+ // No more PHIs to check.
+ break;
+
+ auto *NewPN = PHINode::Create(PN->getType(), /*NumReservedValues*/ 2,
+ PN->getName() + ".split", InsertPt);
+
+ // Walk backwards over the old PHI node's inputs to minimize the cost of
+ // removing each one. We have to do this weird loop manually so that we
+ // create the same number of new incoming edges in the new PHI as we expect
+ // each case-based edge to be included in the unswitched switch in some
+ // cases.
+ // FIXME: This is really, really gross. It would be much cleaner if LLVM
+ // allowed us to create a single entry for a predecessor block without
+ // having separate entries for each "edge" even though these edges are
+ // required to produce identical results.
+ for (int i = PN->getNumIncomingValues() - 1; i >= 0; --i) {
+ if (PN->getIncomingBlock(i) != &OldExitingBB)
+ continue;
+
+ Value *Incoming = PN->removeIncomingValue(i);
+ NewPN->addIncoming(Incoming, &OldPH);
+ }
+
+ // Now replace the old PHI with the new one and wire the old one in as an
+ // input to the new one.
+ PN->replaceAllUsesWith(NewPN);
+ NewPN->addIncoming(PN, &ExitBB);
+ }
+}
+
/// Unswitch a trivial branch if the condition is loop invariant.
///
/// This routine should only be called when loop code leading to the branch has
@@ -187,10 +279,8 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
assert(L.contains(ContinueBB) &&
"Cannot have both successors exit and still be in the loop!");
- // If the loop exit block contains phi nodes, this isn't trivial.
- // FIXME: We should examine the PHI to determine whether or not we can handle
- // it trivially.
- if (isa<PHINode>(LoopExitBB->begin()))
+ auto *ParentBB = BI.getParent();
+ if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB))
return false;
DEBUG(dbgs() << " unswitching trivial branch when: " << CondVal
@@ -209,14 +299,13 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
BasicBlock *UnswitchedBB;
if (BasicBlock *PredBB = LoopExitBB->getUniquePredecessor()) {
(void)PredBB;
- assert(PredBB == BI.getParent() && "A branch's parent is't a predecessor!");
+ assert(PredBB == BI.getParent() &&
+ "A branch's parent isn't a predecessor!");
UnswitchedBB = LoopExitBB;
} else {
UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI);
}
- BasicBlock *ParentBB = BI.getParent();
-
// Now splice the branch to gate reaching the new preheader and re-point its
// successors.
OldPH->getInstList().splice(std::prev(OldPH->end()),
@@ -229,6 +318,13 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
// terminator.
BranchInst::Create(ContinueBB, ParentBB);
+ // Rewrite the relevant PHI nodes.
+ if (UnswitchedBB == LoopExitBB)
+ rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
+ else
+ rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
+ *ParentBB, *OldPH);
+
// Now we need to update the dominator tree.
updateDTAfterUnswitch(UnswitchedBB, OldPH, DT);
// But if we split something off of the loop exit block then we also removed
@@ -278,6 +374,8 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
if (!L.isLoopInvariant(LoopCond))
return false;
+ auto *ParentBB = SI.getParent();
+
// FIXME: We should compute this once at the start and update it!
SmallVector<BasicBlock *, 16> ExitBlocks;
L.getExitBlocks(ExitBlocks);
@@ -287,12 +385,13 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
SmallVector<int, 4> ExitCaseIndices;
for (auto Case : SI.cases()) {
auto *SuccBB = Case.getCaseSuccessor();
- if (ExitBlockSet.count(SuccBB) && !isa<PHINode>(SuccBB->begin()))
+ if (ExitBlockSet.count(SuccBB) &&
+ areLoopExitPHIsLoopInvariant(L, *ParentBB, *SuccBB))
ExitCaseIndices.push_back(Case.getCaseIndex());
}
BasicBlock *DefaultExitBB = nullptr;
if (ExitBlockSet.count(SI.getDefaultDest()) &&
- !isa<PHINode>(SI.getDefaultDest()->begin()) &&
+ areLoopExitPHIsLoopInvariant(L, *ParentBB, *SI.getDefaultDest()) &&
!isa<UnreachableInst>(SI.getDefaultDest()->getTerminator()))
DefaultExitBB = SI.getDefaultDest();
else if (ExitCaseIndices.empty())
@@ -330,7 +429,6 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
if (CommonSuccBB) {
SI.setDefaultDest(CommonSuccBB);
} else {
- BasicBlock *ParentBB = SI.getParent();
BasicBlock *UnreachableBB = BasicBlock::Create(
ParentBB->getContext(),
Twine(ParentBB->getName()) + ".unreachable_default",
@@ -358,30 +456,44 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
// Now add the unswitched switch.
auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
- // Split any exit blocks with remaining in-loop predecessors. We walk in
- // reverse so that we split in the same order as the cases appeared. This is
- // purely for convenience of reading the resulting IR, but it doesn't cost
- // anything really.
+ // Rewrite the IR for the unswitched basic blocks. This requires two steps.
+ // First, we split any exit blocks with remaining in-loop predecessors. Then
+ // we update the PHIs in one of two ways depending on if there was a split.
+ // We walk in reverse so that we split in the same order as the cases
+ // appeared. This is purely for convenience of reading the resulting IR, but
+ // it doesn't cost anything really.
+ SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap;
// Handle the default exit if necessary.
// FIXME: It'd be great if we could merge this with the loop below but LLVM's
// ranges aren't quite powerful enough yet.
- if (DefaultExitBB && !pred_empty(DefaultExitBB)) {
- auto *SplitBB =
- SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI);
- updateLoopExitIDom(DefaultExitBB, L, DT);
- DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
+ if (DefaultExitBB) {
+ if (pred_empty(DefaultExitBB)) {
+ UnswitchedExitBBs.insert(DefaultExitBB);
+ rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
+ } else {
+ auto *SplitBB =
+ SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI);
+ rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
+ *ParentBB, *OldPH);
+ updateLoopExitIDom(DefaultExitBB, L, DT);
+ DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
+ }
}
// Note that we must use a reference in the for loop so that we update the
// container.
for (auto &CasePair : reverse(ExitCases)) {
// Grab a reference to the exit block in the pair so that we can update it.
- BasicBlock *&ExitBB = CasePair.second;
+ BasicBlock *ExitBB = CasePair.second;
// If this case is the last edge into the exit block, we can simply reuse it
// as it will no longer be a loop exit. No mapping necessary.
- if (pred_empty(ExitBB))
+ if (pred_empty(ExitBB)) {
+ // Only rewrite once.
+ if (UnswitchedExitBBs.insert(ExitBB).second)
+ rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
continue;
+ }
// Otherwise we need to split the exit block so that we retain an exit
// block from the loop and a target for the unswitched condition.
@@ -389,9 +501,12 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
if (!SplitExitBB) {
// If this is the first time we see this, do the split and remember it.
SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI);
+ rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
+ *ParentBB, *OldPH);
updateLoopExitIDom(ExitBB, L, DT);
}
- ExitBB = SplitExitBB;
+ // Update the case pair to point to the split block.
+ CasePair.second = SplitExitBB;
}
// Now add the unswitched cases. We do this in reverse order as we built them
diff --git a/lib/Transforms/Scalar/SpeculativeExecution.cpp b/lib/Transforms/Scalar/SpeculativeExecution.cpp
index a0fc966cee2c..a7c308b59877 100644
--- a/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -208,6 +208,47 @@ bool SpeculativeExecutionPass::runOnBasicBlock(BasicBlock &B) {
return false;
}
+static unsigned ComputeSpeculationCost(const Instruction *I,
+ const TargetTransformInfo &TTI) {
+ switch (Operator::getOpcode(I)) {
+ case Instruction::GetElementPtr:
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Select:
+ case Instruction::Shl:
+ case Instruction::Sub:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::Xor:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::Call:
+ case Instruction::BitCast:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::AddrSpaceCast:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPExt:
+ case Instruction::FPTrunc:
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return TTI.getUserCost(I);
+
+ default:
+ return UINT_MAX; // Disallow anything not whitelisted.
+ }
+}
+
bool SpeculativeExecutionPass::considerHoistingFromTo(
BasicBlock &FromBlock, BasicBlock &ToBlock) {
SmallSet<const Instruction *, 8> NotHoisted;
@@ -223,7 +264,7 @@ bool SpeculativeExecutionPass::considerHoistingFromTo(
unsigned TotalSpeculationCost = 0;
for (auto& I : FromBlock) {
- const unsigned Cost = TTI->getUserCost(&I);
+ const unsigned Cost = ComputeSpeculationCost(&I, *TTI);
if (Cost != UINT_MAX && isSafeToSpeculativelyExecute(&I) &&
AllPrecedingUsesFromBlockHoisted(&I)) {
TotalSpeculationCost += Cost;