src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2017-05-16 19:46:52 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-05-16 19:46:52 +0000
commit	6b3f41ed88e8e440e11a4fbf20b6600529f80049 (patch)
tree	928b056f24a634d628c80238dbbf10d41b1a71d5 /lib/Transforms/Scalar
parent	c46e6a5940c50058e00c0c5f9123fd82e338d29a (diff)

Vendor import of llvm trunk r303197:vendor/llvm/llvm-trunk-r303197

https://llvm.org/svn/llvm-project/llvm/trunk@303197

Notes

Notes: svn path=/vendor/llvm/dist/; revision=318368 svn path=/vendor/llvm/llvm-trunk-r303197/; revision=318369; tag=vendor/llvm/llvm-trunk-r303197

Diffstat (limited to 'lib/Transforms/Scalar')

-rw-r--r--

lib/Transforms/Scalar/CorrelatedValuePropagation.cpp

-rw-r--r--

lib/Transforms/Scalar/LoopIdiomRecognize.cpp

295

-rw-r--r--

lib/Transforms/Scalar/NewGVN.cpp

210

-rw-r--r--

lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

161

-rw-r--r--

lib/Transforms/Scalar/SpeculativeExecution.cpp

5 files changed, 602 insertions, 117 deletions

diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 3f1a77b49a44..ee493a8ec7e1 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp

@@ -442,9 +442,8 @@ static bool processAdd(BinaryOperator *AddOp, LazyValueInfo *LVI) {

bool Changed = false;

if (!NUW) {

- ConstantRange NUWRange =

- LRange.makeGuaranteedNoWrapRegion(BinaryOperator::Add, LRange,

- OBO::NoUnsignedWrap);

+ ConstantRange NUWRange = ConstantRange::makeGuaranteedNoWrapRegion(

+ BinaryOperator::Add, LRange, OBO::NoUnsignedWrap);

if (!NUWRange.isEmptySet()) {

bool NewNUW = NUWRange.contains(LazyRRange());

AddOp->setHasNoUnsignedWrap(NewNUW);

@@ -452,9 +451,8 @@ static bool processAdd(BinaryOperator *AddOp, LazyValueInfo *LVI) {

}

if (!NSW) {

- ConstantRange NSWRange =

- LRange.makeGuaranteedNoWrapRegion(BinaryOperator::Add, LRange,

- OBO::NoSignedWrap);

+ ConstantRange NSWRange = ConstantRange::makeGuaranteedNoWrapRegion(

+ BinaryOperator::Add, LRange, OBO::NoSignedWrap);

if (!NSWRange.isEmptySet()) {

bool NewNSW = NSWRange.contains(LazyRRange());

AddOp->setHasNoSignedWrap(NewNSW);

diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 48d5ae88cda9..6693a26e8890 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

@@ -144,6 +144,10 @@ private:

bool recognizePopcount();

void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,

PHINode *CntPhi, Value *Var);

+ bool recognizeAndInsertCTLZ();

+ void transformLoopToCountable(BasicBlock *PreCondBB, Instruction *CntInst,

+ PHINode *CntPhi, Value *Var, const DebugLoc DL,

+ bool ZeroCheck, bool IsCntPhiUsedOutsideLoop);

/// @}

};

@@ -994,7 +998,7 @@ bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,

}

bool LoopIdiomRecognize::runOnNoncountableLoop() {

- return recognizePopcount();

+ return recognizePopcount() || recognizeAndInsertCTLZ();

}

/// Check if the given conditional branch is based on the comparison between

@@ -1159,6 +1163,167 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,

return true;

}

+/// Return true if the idiom is detected in the loop.

+///

+/// Additionally:

+/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)

+/// or nullptr if there is no such.

+/// 2) \p CntPhi is set to the corresponding phi node

+/// or nullptr if there is no such.

+/// 3) \p Var is set to the value whose CTLZ could be used.

+/// 4) \p DefX is set to the instruction calculating Loop exit condition.

+///

+/// The core idiom we are trying to detect is:

+/// \code

+/// if (x0 == 0)

+/// goto loop-exit // the precondition of the loop

+/// cnt0 = init-val;

+/// do {

+/// x = phi (x0, x.next); //PhiX

+/// cnt = phi(cnt0, cnt.next);

+///

+/// cnt.next = cnt + 1;

+/// ...

+/// x.next = x >> 1; // DefX

+/// ...

+/// } while(x.next != 0);

+///

+/// loop-exit:

+/// \endcode

+static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,

+ Instruction *&CntInst, PHINode *&CntPhi,

+ Instruction *&DefX) {

+ BasicBlock *LoopEntry;

+ Value *VarX = nullptr;

+ DefX = nullptr;

+ PhiX = nullptr;

+ CntInst = nullptr;

+ CntPhi = nullptr;

+ LoopEntry = *(CurLoop->block_begin());

+ // step 1: Check if the loop-back branch is in desirable form.

+ if (Value *T = matchCondition(

+ dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))

+ DefX = dyn_cast<Instruction>(T);

+ else

+ return false;

+ // step 2: detect instructions corresponding to "x.next = x >> 1"

+ if (!DefX || DefX->getOpcode() != Instruction::AShr)

+ return false;

+ if (ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1)))

+ if (!Shft || !Shft->isOne())

+ return false;

+ VarX = DefX->getOperand(0);

+ // step 3: Check the recurrence of variable X

+ PhiX = dyn_cast<PHINode>(VarX);

+ if (!PhiX || (PhiX->getOperand(0) != DefX && PhiX->getOperand(1) != DefX))

+ return false;

+ // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1

+ // TODO: We can skip the step. If loop trip count is known (CTLZ),

+ // then all uses of "cnt.next" could be optimized to the trip count

+ // plus "cnt0". Currently it is not optimized.

+ // This step could be used to detect POPCNT instruction:

+ // cnt.next = cnt + (x.next & 1)

+ for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),

+ IterE = LoopEntry->end();

+ Iter != IterE; Iter++) {

+ Instruction *Inst = &*Iter;

+ if (Inst->getOpcode() != Instruction::Add)

+ continue;

+ ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));

+ if (!Inc || !Inc->isOne())

+ continue;

+ PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0));

+ if (!Phi || Phi->getParent() != LoopEntry)

+ continue;

+ CntInst = Inst;

+ CntPhi = Phi;

+ break;

+ }

+ if (!CntInst)

+ return false;

+ return true;

+/// Recognize CTLZ idiom in a non-countable loop and convert the loop

+/// to countable (with CTLZ trip count).

+/// If CTLZ inserted as a new trip count returns true; otherwise, returns false.

+bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {

+ // Give up if the loop has multiple blocks or multiple backedges.

+ if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)

+ return false;

+ Instruction *CntInst, *DefX;

+ PHINode *CntPhi, *PhiX;

+ if (!detectCTLZIdiom(CurLoop, PhiX, CntInst, CntPhi, DefX))

+ return false;

+ bool IsCntPhiUsedOutsideLoop = false;

+ for (User *U : CntPhi->users())

+ if (!CurLoop->contains(dyn_cast<Instruction>(U))) {

+ IsCntPhiUsedOutsideLoop = true;

+ break;

+ }

+ bool IsCntInstUsedOutsideLoop = false;

+ for (User *U : CntInst->users())

+ if (!CurLoop->contains(dyn_cast<Instruction>(U))) {

+ IsCntInstUsedOutsideLoop = true;

+ break;

+ }

+ // If both CntInst and CntPhi are used outside the loop the profitability

+ // is questionable.

+ if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)

+ return false;

+ // For some CPUs result of CTLZ(X) intrinsic is undefined

+ // when X is 0. If we can not guarantee X != 0, we need to check this

+ // when expand.

+ bool ZeroCheck = false;

+ // It is safe to assume Preheader exist as it was checked in

+ // parent function RunOnLoop.

+ BasicBlock *PH = CurLoop->getLoopPreheader();

+ Value *InitX = PhiX->getIncomingValueForBlock(PH);

+ // If we check X != 0 before entering the loop we don't need a zero

+ // check in CTLZ intrinsic.

+ if (BasicBlock *PreCondBB = PH->getSinglePredecessor())

+ if (BranchInst *PreCondBr =

+ dyn_cast<BranchInst>(PreCondBB->getTerminator())) {

+ if (matchCondition(PreCondBr, PH) == InitX)

+ ZeroCheck = true;

+ }

+ // Check if CTLZ intrinsic is profitable. Assume it is always profitable

+ // if we delete the loop (the loop has only 6 instructions):

+ // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]

+ // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]

+ // %shr = ashr %n.addr.0, 1

+ // %tobool = icmp eq %shr, 0

+ // %inc = add nsw %i.0, 1

+ // br i1 %tobool

+ IRBuilder<> Builder(PH->getTerminator());

+ SmallVector<const Value *, 2> Ops =

+ {InitX, ZeroCheck ? Builder.getTrue() : Builder.getFalse()};

+ ArrayRef<const Value *> Args(Ops);

+ if (CurLoop->getHeader()->size() != 6 &&

+ TTI->getIntrinsicCost(Intrinsic::ctlz, InitX->getType(), Args) >

+ TargetTransformInfo::TCC_Basic)

+ return false;

+ const DebugLoc DL = DefX->getDebugLoc();

+ transformLoopToCountable(PH, CntInst, CntPhi, InitX, DL, ZeroCheck,

+ IsCntPhiUsedOutsideLoop);

+ return true;

/// Recognizes a population count idiom in a non-countable loop.

///

/// If detected, transforms the relevant code to issue the popcount intrinsic

@@ -1222,6 +1387,134 @@ static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val,

return CI;

}

+static CallInst *createCTLZIntrinsic(IRBuilder<> &IRBuilder, Value *Val,

+ const DebugLoc &DL, bool ZeroCheck) {

+ Value *Ops[] = {Val, ZeroCheck ? IRBuilder.getTrue() : IRBuilder.getFalse()};

+ Type *Tys[] = {Val->getType()};

+ Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();

+ Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctlz, Tys);

+ CallInst *CI = IRBuilder.CreateCall(Func, Ops);

+ CI->setDebugLoc(DL);

+ return CI;

+/// Transform the following loop:

+/// loop:

+/// CntPhi = PHI [Cnt0, CntInst]

+/// PhiX = PHI [InitX, DefX]

+/// CntInst = CntPhi + 1

+/// DefX = PhiX >> 1

+// LOOP_BODY

+/// Br: loop if (DefX != 0)

+/// Use(CntPhi) or Use(CntInst)

+///

+/// Into:

+/// If CntPhi used outside the loop:

+/// CountPrev = BitWidth(InitX) - CTLZ(InitX >> 1)

+/// Count = CountPrev + 1

+/// else

+/// Count = BitWidth(InitX) - CTLZ(InitX)

+/// loop:

+/// CntPhi = PHI [Cnt0, CntInst]

+/// PhiX = PHI [InitX, DefX]

+/// PhiCount = PHI [Count, Dec]

+/// CntInst = CntPhi + 1

+/// DefX = PhiX >> 1

+/// Dec = PhiCount - 1

+/// LOOP_BODY

+/// Br: loop if (Dec != 0)

+/// Use(CountPrev + Cnt0) // Use(CntPhi)

+/// or

+/// Use(Count + Cnt0) // Use(CntInst)

+///

+/// If LOOP_BODY is empty the loop will be deleted.

+/// If CntInst and DefX are not used in LOOP_BODY they will be removed.

+void LoopIdiomRecognize::transformLoopToCountable(

+ BasicBlock *Preheader, Instruction *CntInst, PHINode *CntPhi, Value *InitX,

+ const DebugLoc DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {

+ BranchInst *PreheaderBr = dyn_cast<BranchInst>(Preheader->getTerminator());

+ // Step 1: Insert the CTLZ instruction at the end of the preheader block

+ // Count = BitWidth - CTLZ(InitX);

+ // If there are uses of CntPhi create:

+ // CountPrev = BitWidth - CTLZ(InitX >> 1);

+ IRBuilder<> Builder(PreheaderBr);

+ Builder.SetCurrentDebugLocation(DL);

+ Value *CTLZ, *Count, *CountPrev, *NewCount, *InitXNext;

+ if (IsCntPhiUsedOutsideLoop)

+ InitXNext = Builder.CreateAShr(InitX,

+ ConstantInt::get(InitX->getType(), 1));

+ else

+ InitXNext = InitX;

+ CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck);

+ Count = Builder.CreateSub(

+ ConstantInt::get(CTLZ->getType(),

+ CTLZ->getType()->getIntegerBitWidth()),

+ CTLZ);

+ if (IsCntPhiUsedOutsideLoop) {

+ CountPrev = Count;

+ Count = Builder.CreateAdd(

+ CountPrev,

+ ConstantInt::get(CountPrev->getType(), 1));

+ }

+ if (IsCntPhiUsedOutsideLoop)

+ NewCount = Builder.CreateZExtOrTrunc(CountPrev,

+ cast<IntegerType>(CntInst->getType()));

+ else

+ NewCount = Builder.CreateZExtOrTrunc(Count,

+ cast<IntegerType>(CntInst->getType()));

+ // If the CTLZ counter's initial value is not zero, insert Add Inst.

+ Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);

+ ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);

+ if (!InitConst || !InitConst->isZero())

+ NewCount = Builder.CreateAdd(NewCount, CntInitVal);

+ // Step 2: Insert new IV and loop condition:

+ // loop:

+ // ...

+ // PhiCount = PHI [Count, Dec]

+ // ...

+ // Dec = PhiCount - 1

+ // ...

+ // Br: loop if (Dec != 0)

+ BasicBlock *Body = *(CurLoop->block_begin());

+ auto *LbBr = dyn_cast<BranchInst>(Body->getTerminator());

+ ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());

+ Type *Ty = Count->getType();

+ PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());

+ Builder.SetInsertPoint(LbCond);

+ Instruction *TcDec = cast<Instruction>(

+ Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),

+ "tcdec", false, true));

+ TcPhi->addIncoming(Count, Preheader);

+ TcPhi->addIncoming(TcDec, Body);

+ CmpInst::Predicate Pred =

+ (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;

+ LbCond->setPredicate(Pred);

+ LbCond->setOperand(0, TcDec);

+ LbCond->setOperand(1, ConstantInt::get(Ty, 0));

+ // Step 3: All the references to the original counter outside

+ // the loop are replaced with the NewCount -- the value returned from

+ // __builtin_ctlz(x).

+ if (IsCntPhiUsedOutsideLoop)

+ CntPhi->replaceUsesOutsideBlock(NewCount, Body);

+ else

+ CntInst->replaceUsesOutsideBlock(NewCount, Body);

+ // step 4: Forget the "non-computable" trip-count SCEV associated with the

+ // loop. The loop would otherwise not be deleted even if it becomes empty.

+ SE->forgetLoop(CurLoop);

void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,

Instruction *CntInst,

PHINode *CntPhi, Value *Var) {

diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 3c9850b156ac..5e0a705782ea 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp

@@ -283,7 +283,6 @@ public:

// Forward propagation info

const Expression *getDefiningExpr() const { return DefiningExpr; }

- void setDefiningExpr(const Expression *E) { DefiningExpr = E; }

// Value member set

bool empty() const { return Members.empty(); }

@@ -317,6 +316,9 @@ public:

--StoreCount;

}

+ // True if this class has no memory members.

+ bool definesNoMemory() const { return StoreCount == 0 && memory_empty(); }

// Return true if two congruence classes are equivalent to each other. This

// means

// that every field but the ID number and the dead field are equivalent.

@@ -401,9 +403,12 @@ class NewGVN {

MemorySSAWalker *MSSAWalker;

const DataLayout &DL;

std::unique_ptr<PredicateInfo> PredInfo;

- BumpPtrAllocator ExpressionAllocator;

- ArrayRecycler<Value *> ArgRecycler;

- TarjanSCC SCCFinder;

+ // These are the only two things the create* functions should have

+ // side-effects on due to allocating memory.

+ mutable BumpPtrAllocator ExpressionAllocator;

+ mutable ArrayRecycler<Value *> ArgRecycler;

+ mutable TarjanSCC SCCFinder;

const SimplifyQuery SQ;

// Number of function arguments, used by ranking

@@ -430,11 +435,12 @@ class NewGVN {

// In order to correctly ensure propagation, we must keep track of what

// comparisons we used, so that when the values of the comparisons change, we

// propagate the information to the places we used the comparison.

- DenseMap<const Value *, SmallPtrSet<Instruction *, 2>> PredicateToUsers;

- // Mapping from MemoryAccess we used to the MemoryAccess we used it with. Has

+ mutable DenseMap<const Value *, SmallPtrSet<Instruction *, 2>>

+ PredicateToUsers;

// the same reasoning as PredicateToUsers. When we skip MemoryAccesses for

// stores, we no longer can rely solely on the def-use chains of MemorySSA.

- DenseMap<const MemoryAccess *, SmallPtrSet<MemoryAccess *, 2>> MemoryToUsers;

+ mutable DenseMap<const MemoryAccess *, SmallPtrSet<MemoryAccess *, 2>>

+ MemoryToUsers;

// A table storing which memorydefs/phis represent a memory state provably

// equivalent to another memory state.

@@ -457,7 +463,7 @@ class NewGVN {

DenseMap<const MemoryPhi *, MemoryPhiState> MemoryPhiState;

enum PhiCycleState { PCS_Unknown, PCS_CycleFree, PCS_Cycle };

- DenseMap<const PHINode *, PhiCycleState> PhiCycleState;

+ mutable DenseMap<const PHINode *, PhiCycleState> PhiCycleState;

// Expression to class mapping.

using ExpressionClassMap = DenseMap<const Expression *, CongruenceClass *>;

ExpressionClassMap ExpressionToClass;

@@ -511,21 +517,24 @@ public:

private:

// Expression handling.

- const Expression *createExpression(Instruction *);

- const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *);

- PHIExpression *createPHIExpression(Instruction *, bool &HasBackedge,

- bool &AllConstant);

- const VariableExpression *createVariableExpression(Value *);

- const ConstantExpression *createConstantExpression(Constant *);

- const Expression *createVariableOrConstant(Value *V);

- const UnknownExpression *createUnknownExpression(Instruction *);

+ const Expression *createExpression(Instruction *) const;

+ const Expression *createBinaryExpression(unsigned, Type *, Value *,

+ Value *) const;

+ PHIExpression *createPHIExpression(Instruction *, bool &HasBackEdge,

+ bool &AllConstant) const;

+ const VariableExpression *createVariableExpression(Value *) const;

+ const ConstantExpression *createConstantExpression(Constant *) const;

+ const Expression *createVariableOrConstant(Value *V) const;

+ const UnknownExpression *createUnknownExpression(Instruction *) const;

const StoreExpression *createStoreExpression(StoreInst *,

- const MemoryAccess *);

+ const MemoryAccess *) const;

LoadExpression *createLoadExpression(Type *, Value *, LoadInst *,

- const MemoryAccess *);

- const CallExpression *createCallExpression(CallInst *, const MemoryAccess *);

- const AggregateValueExpression *createAggregateValueExpression(Instruction *);

- bool setBasicExpressionInfo(Instruction *, BasicExpression *);

+ const MemoryAccess *) const;

+ const CallExpression *createCallExpression(CallInst *,

+ const MemoryAccess *) const;

+ const AggregateValueExpression *

+ createAggregateValueExpression(Instruction *) const;

+ bool setBasicExpressionInfo(Instruction *, BasicExpression *) const;

// Congruence class handling.

CongruenceClass *createCongruenceClass(Value *Leader, const Expression *E) {

@@ -560,17 +569,18 @@ private:

// Symbolic evaluation.

const Expression *checkSimplificationResults(Expression *, Instruction *,

- Value *);

- const Expression *performSymbolicEvaluation(Value *);

+ Value *) const;

+ const Expression *performSymbolicEvaluation(Value *) const;

const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *,

- Instruction *, MemoryAccess *);

- const Expression *performSymbolicLoadEvaluation(Instruction *);

- const Expression *performSymbolicStoreEvaluation(Instruction *);

- const Expression *performSymbolicCallEvaluation(Instruction *);

- const Expression *performSymbolicPHIEvaluation(Instruction *);

- const Expression *performSymbolicAggrValueEvaluation(Instruction *);

- const Expression *performSymbolicCmpEvaluation(Instruction *);

- const Expression *performSymbolicPredicateInfoEvaluation(Instruction *);

+ Instruction *,

+ MemoryAccess *) const;

+ const Expression *performSymbolicLoadEvaluation(Instruction *) const;

+ const Expression *performSymbolicStoreEvaluation(Instruction *) const;

+ const Expression *performSymbolicCallEvaluation(Instruction *) const;

+ const Expression *performSymbolicPHIEvaluation(Instruction *) const;

+ const Expression *performSymbolicAggrValueEvaluation(Instruction *) const;

+ const Expression *performSymbolicCmpEvaluation(Instruction *) const;

+ const Expression *performSymbolicPredicateInfoEvaluation(Instruction *) const;

// Congruence finding.

bool someEquivalentDominates(const Instruction *, const Instruction *) const;

@@ -620,8 +630,8 @@ private:

void markPredicateUsersTouched(Instruction *);

void markValueLeaderChangeTouched(CongruenceClass *CC);

void markMemoryLeaderChangeTouched(CongruenceClass *CC);

- void addPredicateUsers(const PredicateBase *, Instruction *);

- void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U);

+ void addPredicateUsers(const PredicateBase *, Instruction *) const;

+ void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const;

// Main loop of value numbering

void iterateTouchedInstructions();

@@ -634,7 +644,7 @@ private:

void verifyIterationSettled(Function &F);

bool singleReachablePHIPath(const MemoryAccess *, const MemoryAccess *) const;

BasicBlock *getBlockForValue(Value *V) const;

- void deleteExpression(const Expression *E);

+ void deleteExpression(const Expression *E) const;

unsigned InstrToDFSNum(const Value *V) const {

assert(isa<Instruction>(V) && "This should not be used for MemoryAccesses");

return InstrDFS.lookup(V);

@@ -654,7 +664,7 @@ private:

? InstrToDFSNum(cast<MemoryUseOrDef>(MA)->getMemoryInst())

: InstrDFS.lookup(MA);

}

- bool isCycleFree(const PHINode *PN);

+ bool isCycleFree(const PHINode *PN) const;

template <class T, class Range> T *getMinDFSOfRange(const Range &) const;

// Debug counter info. When verifying, we have to reset the value numbering

// debug counter to the same state it started in to get the same results.

@@ -702,7 +712,7 @@ BasicBlock *NewGVN::getBlockForValue(Value *V) const {

// Delete a definitely dead expression, so it can be reused by the expression

// allocator. Some of these are not in creation functions, so we have to accept

// const versions.

-void NewGVN::deleteExpression(const Expression *E) {

+void NewGVN::deleteExpression(const Expression *E) const {

assert(isa<BasicExpression>(E));

auto *BE = cast<BasicExpression>(E);

const_cast<BasicExpression *>(BE)->deallocateOperands(ArgRecycler);

@@ -710,7 +720,7 @@ void NewGVN::deleteExpression(const Expression *E) {

}

PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,

- bool &AllConstant) {

+ bool &AllConstant) const {

BasicBlock *PHIBlock = I->getParent();

auto *PN = cast<PHINode>(I);

auto *E =

@@ -722,30 +732,46 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,

unsigned PHIRPO = RPOOrdering.lookup(DT->getNode(PHIBlock));

+ // NewGVN assumes the operands of a PHI node are in a consistent order across

+ // PHIs. LLVM doesn't seem to always guarantee this. While we need to fix

+ // this in LLVM at some point we don't want GVN to find wrong congruences.

+ // Therefore, here we sort uses in predecessor order.

+ // We're sorting the values by pointer. In theory this might be cause of

+ // non-determinism, but here we don't rely on the ordering for anything

+ // significant, e.g. we don't create new instructions based on it so we're

+ // fine.

+ SmallVector<const Use *, 4> PHIOperands;

+ for (const Use &U : PN->operands())

+ PHIOperands.push_back(&U);

+ std::sort(PHIOperands.begin(), PHIOperands.end(),

+ [&](const Use *U1, const Use *U2) {

+ return PN->getIncomingBlock(*U1) < PN->getIncomingBlock(*U2);

+ });

// Filter out unreachable phi operands.

- auto Filtered = make_filter_range(PN->operands(), [&](const Use &U) {

- return ReachableEdges.count({PN->getIncomingBlock(U), PHIBlock});

+ auto Filtered = make_filter_range(PHIOperands, [&](const Use *U) {

+ return ReachableEdges.count({PN->getIncomingBlock(*U), PHIBlock});

});

std::transform(Filtered.begin(), Filtered.end(), op_inserter(E),

- [&](const Use &U) -> Value * {

- auto *BB = PN->getIncomingBlock(U);

+ [&](const Use *U) -> Value * {

+ auto *BB = PN->getIncomingBlock(*U);

auto *DTN = DT->getNode(BB);

if (RPOOrdering.lookup(DTN) >= PHIRPO)

HasBackedge = true;

- AllConstant &= isa<UndefValue>(U) || isa<Constant>(U);

+ AllConstant &= isa<UndefValue>(*U) || isa<Constant>(*U);

// Don't try to transform self-defined phis.

- if (U == PN)

+ if (*U == PN)

return PN;

- return lookupOperandLeader(U);

+ return lookupOperandLeader(*U);

});

return E;

}

// Set basic expression info (Arguments, type, opcode) for Expression

// E from Instruction I in block B.

-bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) {

+bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) const {

bool AllConstant = true;

if (auto *GEP = dyn_cast<GetElementPtrInst>(I))

E->setType(GEP->getSourceElementType());

@@ -766,7 +792,8 @@ bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) {

}

const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T,

- Value *Arg1, Value *Arg2) {

+ Value *Arg1,

+ Value *Arg2) const {

auto *E = new (ExpressionAllocator) BasicExpression(2);

E->setType(T);

@@ -795,7 +822,8 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T,

// TODO: Once finished, this should not take an Instruction, we only

// use it for printing.

const Expression *NewGVN::checkSimplificationResults(Expression *E,

- Instruction *I, Value *V) {

+ Instruction *I,

+ Value *V) const {

if (!V)

return nullptr;

if (auto *C = dyn_cast<Constant>(V)) {

@@ -827,7 +855,7 @@ const Expression *NewGVN::checkSimplificationResults(Expression *E,

return nullptr;

}

-const Expression *NewGVN::createExpression(Instruction *I) {

+const Expression *NewGVN::createExpression(Instruction *I) const {

auto *E = new (ExpressionAllocator) BasicExpression(I->getNumOperands());

bool AllConstant = setBasicExpressionInfo(I, E);

@@ -913,7 +941,7 @@ const Expression *NewGVN::createExpression(Instruction *I) {

}

const AggregateValueExpression *

-NewGVN::createAggregateValueExpression(Instruction *I) {

+NewGVN::createAggregateValueExpression(Instruction *I) const {

if (auto *II = dyn_cast<InsertValueInst>(I)) {

auto *E = new (ExpressionAllocator)

AggregateValueExpression(I->getNumOperands(), II->getNumIndices());

@@ -932,32 +960,32 @@ NewGVN::createAggregateValueExpression(Instruction *I) {

llvm_unreachable("Unhandled type of aggregate value operation");

}

-const VariableExpression *NewGVN::createVariableExpression(Value *V) {

+const VariableExpression *NewGVN::createVariableExpression(Value *V) const {

auto *E = new (ExpressionAllocator) VariableExpression(V);

E->setOpcode(V->getValueID());

return E;

}

-const Expression *NewGVN::createVariableOrConstant(Value *V) {

+const Expression *NewGVN::createVariableOrConstant(Value *V) const {

if (auto *C = dyn_cast<Constant>(V))

return createConstantExpression(C);

return createVariableExpression(V);

}

-const ConstantExpression *NewGVN::createConstantExpression(Constant *C) {

+const ConstantExpression *NewGVN::createConstantExpression(Constant *C) const {

auto *E = new (ExpressionAllocator) ConstantExpression(C);

E->setOpcode(C->getValueID());

return E;

}

-const UnknownExpression *NewGVN::createUnknownExpression(Instruction *I) {

+const UnknownExpression *NewGVN::createUnknownExpression(Instruction *I) const {

auto *E = new (ExpressionAllocator) UnknownExpression(I);

E->setOpcode(I->getOpcode());

return E;

}

-const CallExpression *NewGVN::createCallExpression(CallInst *CI,

- const MemoryAccess *MA) {

+const CallExpression *

+NewGVN::createCallExpression(CallInst *CI, const MemoryAccess *MA) const {

// FIXME: Add operand bundles for calls.

auto *E =

new (ExpressionAllocator) CallExpression(CI->getNumOperands(), CI, MA);

@@ -1017,9 +1045,8 @@ Value *NewGVN::lookupOperandLeader(Value *V) const {

const MemoryAccess *NewGVN::lookupMemoryLeader(const MemoryAccess *MA) const {

auto *CC = getMemoryClass(MA);

assert(CC->getMemoryLeader() &&

- "Every MemoryAccess should be mapped to a "

- "congruence class with a represenative memory "

- "access");

+ "Every MemoryAccess should be mapped to a congruence class with a "

+ "representative memory access");

return CC->getMemoryLeader();

}

@@ -1032,7 +1059,7 @@ bool NewGVN::isMemoryAccessTop(const MemoryAccess *MA) const {

LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp,

LoadInst *LI,

- const MemoryAccess *MA) {

+ const MemoryAccess *MA) const {

auto *E =

new (ExpressionAllocator) LoadExpression(1, LI, lookupMemoryLeader(MA));

E->allocateOperands(ArgRecycler, ExpressionAllocator);

@@ -1050,8 +1077,8 @@ LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp,

return E;

}

-const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI,

- const MemoryAccess *MA) {

+const StoreExpression *

+NewGVN::createStoreExpression(StoreInst *SI, const MemoryAccess *MA) const {

auto *StoredValueLeader = lookupOperandLeader(SI->getValueOperand());

auto *E = new (ExpressionAllocator)

StoreExpression(SI->getNumOperands(), SI, StoredValueLeader, MA);

@@ -1068,7 +1095,7 @@ const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI,

return E;

}

-const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) {

+const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const {

// Unlike loads, we never try to eliminate stores, so we do not check if they

// are simple and avoid value numbering them.

auto *SI = cast<StoreInst>(I);

@@ -1126,7 +1153,7 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) {

const Expression *

NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr,

LoadInst *LI, Instruction *DepInst,

- MemoryAccess *DefiningAccess) {

+ MemoryAccess *DefiningAccess) const {

assert((!LI || LI->isSimple()) && "Not a simple load");

if (auto *DepSI = dyn_cast<StoreInst>(DepInst)) {

// Can't forward from non-atomic to atomic without violating memory model.

@@ -1201,7 +1228,7 @@ NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr,

return nullptr;

}

-const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) {

+const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const {

auto *LI = cast<LoadInst>(I);

// We can eliminate in favor of non-simple loads, but we won't be able to

@@ -1239,7 +1266,7 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) {

}

const Expression *

-NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) {

+NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const {

auto *PI = PredInfo->getPredicateInfoFor(I);

if (!PI)

return nullptr;

@@ -1284,7 +1311,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) {

return nullptr;

if (CopyOf != Cmp->getOperand(0) && CopyOf != Cmp->getOperand(1)) {

- DEBUG(dbgs() << "Copy is not of any condition operands!");

+ DEBUG(dbgs() << "Copy is not of any condition operands!\n");

return nullptr;

}

Value *FirstOp = lookupOperandLeader(Cmp->getOperand(0));

@@ -1329,7 +1356,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) {

}

// Evaluate read only and pure calls, and create an expression result.

-const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I) {

+const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I) const {

auto *CI = cast<CallInst>(I);

if (auto *II = dyn_cast<IntrinsicInst>(I)) {

// Instrinsics with the returned attribute are copies of arguments.

@@ -1366,8 +1393,7 @@ bool NewGVN::setMemoryClass(const MemoryAccess *From,

DEBUG(dbgs() << "Setting " << *From);

DEBUG(dbgs() << " equivalent to congruence class ");

DEBUG(dbgs() << NewClass->getID() << " with current MemoryAccess leader ");

- DEBUG(dbgs() << *NewClass->getMemoryLeader());

- DEBUG(dbgs() << "\n");

+ DEBUG(dbgs() << *NewClass->getMemoryLeader() << "\n");

auto LookupResult = MemoryAccessToClass.find(From);

bool Changed = false;

@@ -1381,7 +1407,7 @@ bool NewGVN::setMemoryClass(const MemoryAccess *From,

NewClass->memory_insert(MP);

// This may have killed the class if it had no non-memory members

if (OldClass->getMemoryLeader() == From) {

- if (OldClass->memory_empty()) {

+ if (OldClass->definesNoMemory()) {

OldClass->setMemoryLeader(nullptr);

} else {

OldClass->setMemoryLeader(getNextMemoryLeader(OldClass));

@@ -1406,7 +1432,7 @@ bool NewGVN::setMemoryClass(const MemoryAccess *From,

// Determine if a phi is cycle-free. That means the values in the phi don't

// depend on any expressions that can change value as a result of the phi.

// For example, a non-cycle free phi would be v = phi(0, v+1).

-bool NewGVN::isCycleFree(const PHINode *PN) {

+bool NewGVN::isCycleFree(const PHINode *PN) const {

// In order to compute cycle-freeness, we do SCC finding on the phi, and see

// what kind of SCC it ends up in. If it is a singleton, it is cycle-free.

// If it is not in a singleton, it is only cycle free if the other members are

@@ -1436,7 +1462,7 @@ bool NewGVN::isCycleFree(const PHINode *PN) {

}

// Evaluate PHI nodes symbolically, and create an expression result.

-const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) {

+const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {

// True if one of the incoming phi edges is a backedge.

bool HasBackedge = false;

// All constant tracks the state of whether all the *original* phi operands

@@ -1510,7 +1536,8 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) {

return E;

}

-const Expression *NewGVN::performSymbolicAggrValueEvaluation(Instruction *I) {

+const Expression *

+NewGVN::performSymbolicAggrValueEvaluation(Instruction *I) const {

if (auto *EI = dyn_cast<ExtractValueInst>(I)) {

auto *II = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());

if (II && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) {

@@ -1548,7 +1575,7 @@ const Expression *NewGVN::performSymbolicAggrValueEvaluation(Instruction *I) {

return createAggregateValueExpression(I);

}

-const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) {

+const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) const {

auto *CI = dyn_cast<CmpInst>(I);

// See if our operands are equal to those of a previous predicate, and if so,

// if it implies true or false.

@@ -1663,7 +1690,7 @@ const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) {

}

// Substitute and symbolize the value before value numbering.

-const Expression *NewGVN::performSymbolicEvaluation(Value *V) {

+const Expression *NewGVN::performSymbolicEvaluation(Value *V) const {

const Expression *E = nullptr;

if (auto *C = dyn_cast<Constant>(V))

E = createConstantExpression(C);

@@ -1749,7 +1776,7 @@ void NewGVN::markUsersTouched(Value *V) {

}

-void NewGVN::addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) {

+void NewGVN::addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const {

DEBUG(dbgs() << "Adding memory user " << *U << " to " << *To << "\n");

MemoryToUsers[To].insert(U);

}

@@ -1772,7 +1799,7 @@ void NewGVN::markMemoryUsersTouched(const MemoryAccess *MA) {

}

// Add I to the set of users of a given predicate.

-void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) {

+void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) const {

if (auto *PBranch = dyn_cast<PredicateBranch>(PB))

PredicateToUsers[PBranch->Condition].insert(I);

else if (auto *PAssume = dyn_cast<PredicateBranch>(PB))

@@ -1825,8 +1852,7 @@ const MemoryAccess *NewGVN::getNextMemoryLeader(CongruenceClass *CC) const {

// TODO: If this ends up to slow, we can maintain a next memory leader like we

// do for regular leaders.

// Make sure there will be a leader to find

- assert((CC->getStoreCount() > 0 || !CC->memory_empty()) &&

- "Can't get next leader if there is none");

+ assert(!CC->definesNoMemory() && "Can't get next leader if there is none");

if (CC->getStoreCount() > 0) {

if (auto *NL = dyn_cast_or_null<StoreInst>(CC->getNextLeader().first))

return MSSA->getMemoryAccess(NL);

@@ -1898,7 +1924,7 @@ void NewGVN::moveMemoryToNewCongruenceClass(Instruction *I,

setMemoryClass(InstMA, NewClass);

// Now, fixup the old class if necessary

if (OldClass->getMemoryLeader() == InstMA) {

- if (OldClass->getStoreCount() != 0 || !OldClass->memory_empty()) {

+ if (!OldClass->definesNoMemory()) {

OldClass->setMemoryLeader(getNextMemoryLeader(OldClass));

DEBUG(dbgs() << "Memory class leader change for class "

<< OldClass->getID() << " to "

@@ -1956,10 +1982,9 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,

if (NewClass->getStoreCount() == 0 && !NewClass->getStoredValue()) {

// If it's a store expression we are using, it means we are not equivalent

// to something earlier.

- if (isa<StoreExpression>(E)) {

- assert(lookupOperandLeader(SI->getValueOperand()) !=

- NewClass->getLeader());

- NewClass->setStoredValue(lookupOperandLeader(SI->getValueOperand()));

+ if (auto *SE = dyn_cast<StoreExpression>(E)) {

+ assert(SE->getStoredValue() != NewClass->getLeader());

+ NewClass->setStoredValue(SE->getStoredValue());

markValueLeaderChangeTouched(NewClass);

// Shift the new class leader to be the store

DEBUG(dbgs() << "Changing leader of congruence class "

@@ -1985,7 +2010,7 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,

// See if we destroyed the class or need to swap leaders.

if (OldClass->empty() && OldClass != TOPClass) {

if (OldClass->getDefiningExpr()) {

- DEBUG(dbgs() << "Erasing expression " << OldClass->getDefiningExpr()

+ DEBUG(dbgs() << "Erasing expression " << *OldClass->getDefiningExpr()

<< " from table\n");

ExpressionToClass.erase(OldClass->getDefiningExpr());

}

@@ -2064,7 +2089,7 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {

} else if (const auto *SE = dyn_cast<StoreExpression>(E)) {

StoreInst *SI = SE->getStoreInst();

NewClass->setLeader(SI);

- NewClass->setStoredValue(lookupOperandLeader(SI->getValueOperand()));

+ NewClass->setStoredValue(SE->getStoredValue());

// The RepMemoryAccess field will be filled in properly by the

// moveValueToNewCongruenceClass call.

} else {

@@ -2523,6 +2548,19 @@ void NewGVN::verifyMemoryCongruency() const {

return false;

if (auto *MemDef = dyn_cast<MemoryDef>(Pair.first))

return !isInstructionTriviallyDead(MemDef->getMemoryInst());

+ // We could have phi nodes which operands are all trivially dead,

+ // so we don't process them.

+ if (auto *MemPHI = dyn_cast<MemoryPhi>(Pair.first)) {

+ for (auto &U : MemPHI->incoming_values()) {

+ if (Instruction *I = dyn_cast<Instruction>(U.get())) {

+ if (!isInstructionTriviallyDead(I))

+ return true;

+ }

+ return false;

+ }

return true;

};

diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index fb1b47c48276..4f608c97147d 100644
--- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

@@ -55,7 +55,7 @@ static void replaceLoopUsesWithConstant(Loop &L, Value &LIC,

/// Update the dominator tree after removing one exiting predecessor of a loop

/// exit block.

static void updateLoopExitIDom(BasicBlock *LoopExitBB, Loop &L,

- DominatorTree &DT) {

+ DominatorTree &DT) {

assert(pred_begin(LoopExitBB) != pred_end(LoopExitBB) &&

"Cannot have empty predecessors of the loop exit block if we split "

"off a block to unswitch!");

@@ -137,6 +137,98 @@ static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH,

}

+/// Check that all the LCSSA PHI nodes in the loop exit block have trivial

+/// incoming values along this edge.

+static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,

+ BasicBlock &ExitBB) {

+ for (Instruction &I : ExitBB) {

+ auto *PN = dyn_cast<PHINode>(&I);

+ if (!PN)

+ // No more PHIs to check.

+ return true;

+ // If the incoming value for this edge isn't loop invariant the unswitch

+ // won't be trivial.

+ if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))

+ return false;

+ }

+ llvm_unreachable("Basic blocks should never be empty!");

+/// Rewrite the PHI nodes in an unswitched loop exit basic block.

+///

+/// Requires that the loop exit and unswitched basic block are the same, and

+/// that the exiting block was a unique predecessor of that block. Rewrites the

+/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial

+/// PHI nodes from the old preheader that now contains the unswitched

+/// terminator.

+static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB,

+ BasicBlock &OldExitingBB,

+ BasicBlock &OldPH) {

+ for (Instruction &I : UnswitchedBB) {

+ auto *PN = dyn_cast<PHINode>(&I);

+ if (!PN)

+ // No more PHIs to check.

+ break;

+ // When the loop exit is directly unswitched we just need to update the

+ // incoming basic block. We loop to handle weird cases with repeated

+ // incoming blocks, but expect to typically only have one operand here.

+ for (auto i : llvm::seq<int>(0, PN->getNumOperands())) {

+ assert(PN->getIncomingBlock(i) == &OldExitingBB &&

+ "Found incoming block different from unique predecessor!");

+ PN->setIncomingBlock(i, &OldPH);

+ }

+/// Rewrite the PHI nodes in the loop exit basic block and the split off

+/// unswitched block.

+///

+/// Because the exit block remains an exit from the loop, this rewrites the

+/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI

+/// nodes into the unswitched basic block to select between the value in the

+/// old preheader and the loop exit.

+static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,

+ BasicBlock &UnswitchedBB,

+ BasicBlock &OldExitingBB,

+ BasicBlock &OldPH) {

+ assert(&ExitBB != &UnswitchedBB &&

+ "Must have different loop exit and unswitched blocks!");

+ Instruction *InsertPt = &*UnswitchedBB.begin();

+ for (Instruction &I : ExitBB) {

+ auto *PN = dyn_cast<PHINode>(&I);

+ if (!PN)

+ // No more PHIs to check.

+ break;

+ auto *NewPN = PHINode::Create(PN->getType(), /*NumReservedValues*/ 2,

+ PN->getName() + ".split", InsertPt);

+ // Walk backwards over the old PHI node's inputs to minimize the cost of

+ // removing each one. We have to do this weird loop manually so that we

+ // create the same number of new incoming edges in the new PHI as we expect

+ // each case-based edge to be included in the unswitched switch in some

+ // cases.

+ // FIXME: This is really, really gross. It would be much cleaner if LLVM

+ // allowed us to create a single entry for a predecessor block without

+ // having separate entries for each "edge" even though these edges are

+ // required to produce identical results.

+ for (int i = PN->getNumIncomingValues() - 1; i >= 0; --i) {

+ if (PN->getIncomingBlock(i) != &OldExitingBB)

+ continue;

+ Value *Incoming = PN->removeIncomingValue(i);

+ NewPN->addIncoming(Incoming, &OldPH);

+ }

+ // Now replace the old PHI with the new one and wire the old one in as an

+ // input to the new one.

+ PN->replaceAllUsesWith(NewPN);

+ NewPN->addIncoming(PN, &ExitBB);

+ }

/// Unswitch a trivial branch if the condition is loop invariant.

///

/// This routine should only be called when loop code leading to the branch has

@@ -187,10 +279,8 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,

assert(L.contains(ContinueBB) &&

"Cannot have both successors exit and still be in the loop!");

- // If the loop exit block contains phi nodes, this isn't trivial.

- // FIXME: We should examine the PHI to determine whether or not we can handle

- // it trivially.

- if (isa<PHINode>(LoopExitBB->begin()))

+ auto *ParentBB = BI.getParent();

+ if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB))

return false;

DEBUG(dbgs() << " unswitching trivial branch when: " << CondVal

@@ -209,14 +299,13 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,

BasicBlock *UnswitchedBB;

if (BasicBlock *PredBB = LoopExitBB->getUniquePredecessor()) {

(void)PredBB;

- assert(PredBB == BI.getParent() && "A branch's parent is't a predecessor!");

+ assert(PredBB == BI.getParent() &&

+ "A branch's parent isn't a predecessor!");

UnswitchedBB = LoopExitBB;

} else {

UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI);

}

- BasicBlock *ParentBB = BI.getParent();

// Now splice the branch to gate reaching the new preheader and re-point its

// successors.

OldPH->getInstList().splice(std::prev(OldPH->end()),

@@ -229,6 +318,13 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,

// terminator.

BranchInst::Create(ContinueBB, ParentBB);

+ // Rewrite the relevant PHI nodes.

+ if (UnswitchedBB == LoopExitBB)

+ rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);

+ else

+ rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,

+ *ParentBB, *OldPH);

// Now we need to update the dominator tree.

updateDTAfterUnswitch(UnswitchedBB, OldPH, DT);

// But if we split something off of the loop exit block then we also removed

@@ -278,6 +374,8 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,

if (!L.isLoopInvariant(LoopCond))

return false;

+ auto *ParentBB = SI.getParent();

// FIXME: We should compute this once at the start and update it!

SmallVector<BasicBlock *, 16> ExitBlocks;

L.getExitBlocks(ExitBlocks);

@@ -287,12 +385,13 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,

SmallVector<int, 4> ExitCaseIndices;

for (auto Case : SI.cases()) {

auto *SuccBB = Case.getCaseSuccessor();

- if (ExitBlockSet.count(SuccBB) && !isa<PHINode>(SuccBB->begin()))

+ if (ExitBlockSet.count(SuccBB) &&

+ areLoopExitPHIsLoopInvariant(L, *ParentBB, *SuccBB))

ExitCaseIndices.push_back(Case.getCaseIndex());

}

BasicBlock *DefaultExitBB = nullptr;

if (ExitBlockSet.count(SI.getDefaultDest()) &&

- !isa<PHINode>(SI.getDefaultDest()->begin()) &&

+ areLoopExitPHIsLoopInvariant(L, *ParentBB, *SI.getDefaultDest()) &&

!isa<UnreachableInst>(SI.getDefaultDest()->getTerminator()))

DefaultExitBB = SI.getDefaultDest();

else if (ExitCaseIndices.empty())

@@ -330,7 +429,6 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,

if (CommonSuccBB) {

SI.setDefaultDest(CommonSuccBB);

} else {

- BasicBlock *ParentBB = SI.getParent();

BasicBlock *UnreachableBB = BasicBlock::Create(

ParentBB->getContext(),

Twine(ParentBB->getName()) + ".unreachable_default",

@@ -358,30 +456,44 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,

// Now add the unswitched switch.

auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);

- // Split any exit blocks with remaining in-loop predecessors. We walk in

- // reverse so that we split in the same order as the cases appeared. This is

- // purely for convenience of reading the resulting IR, but it doesn't cost

- // anything really.

+ // Rewrite the IR for the unswitched basic blocks. This requires two steps.

+ // First, we split any exit blocks with remaining in-loop predecessors. Then

+ // we update the PHIs in one of two ways depending on if there was a split.

+ // We walk in reverse so that we split in the same order as the cases

+ // appeared. This is purely for convenience of reading the resulting IR, but

+ // it doesn't cost anything really.

+ SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;

SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap;

// Handle the default exit if necessary.

// FIXME: It'd be great if we could merge this with the loop below but LLVM's

// ranges aren't quite powerful enough yet.

- if (DefaultExitBB && !pred_empty(DefaultExitBB)) {

- auto *SplitBB =

- SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI);

- updateLoopExitIDom(DefaultExitBB, L, DT);

- DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;

+ if (DefaultExitBB) {

+ if (pred_empty(DefaultExitBB)) {

+ UnswitchedExitBBs.insert(DefaultExitBB);

+ rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);

+ } else {

+ auto *SplitBB =

+ SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI);

+ rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,

+ *ParentBB, *OldPH);

+ updateLoopExitIDom(DefaultExitBB, L, DT);

+ DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;

+ }

}

// Note that we must use a reference in the for loop so that we update the

// container.

for (auto &CasePair : reverse(ExitCases)) {

// Grab a reference to the exit block in the pair so that we can update it.

- BasicBlock *&ExitBB = CasePair.second;

+ BasicBlock *ExitBB = CasePair.second;

// If this case is the last edge into the exit block, we can simply reuse it

// as it will no longer be a loop exit. No mapping necessary.

- if (pred_empty(ExitBB))

+ if (pred_empty(ExitBB)) {

+ // Only rewrite once.

+ if (UnswitchedExitBBs.insert(ExitBB).second)

+ rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);

continue;

+ }

// Otherwise we need to split the exit block so that we retain an exit

// block from the loop and a target for the unswitched condition.

@@ -389,9 +501,12 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,

if (!SplitExitBB) {

// If this is the first time we see this, do the split and remember it.

SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI);

+ rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,

+ *ParentBB, *OldPH);

updateLoopExitIDom(ExitBB, L, DT);

}

- ExitBB = SplitExitBB;

+ // Update the case pair to point to the split block.

+ CasePair.second = SplitExitBB;

}

// Now add the unswitched cases. We do this in reverse order as we built them

diff --git a/lib/Transforms/Scalar/SpeculativeExecution.cpp b/lib/Transforms/Scalar/SpeculativeExecution.cpp
index a0fc966cee2c..a7c308b59877 100644
--- a/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/lib/Transforms/Scalar/SpeculativeExecution.cpp

@@ -208,6 +208,47 @@ bool SpeculativeExecutionPass::runOnBasicBlock(BasicBlock &B) {

return false;

}

+static unsigned ComputeSpeculationCost(const Instruction *I,

+ const TargetTransformInfo &TTI) {

+ switch (Operator::getOpcode(I)) {

+ case Instruction::GetElementPtr:

+ case Instruction::Add:

+ case Instruction::Mul:

+ case Instruction::And:

+ case Instruction::Or:

+ case Instruction::Select:

+ case Instruction::Shl:

+ case Instruction::Sub:

+ case Instruction::LShr:

+ case Instruction::AShr:

+ case Instruction::Xor:

+ case Instruction::ZExt:

+ case Instruction::SExt:

+ case Instruction::Call:

+ case Instruction::BitCast:

+ case Instruction::PtrToInt:

+ case Instruction::IntToPtr:

+ case Instruction::AddrSpaceCast:

+ case Instruction::FPToUI:

+ case Instruction::FPToSI:

+ case Instruction::UIToFP:

+ case Instruction::SIToFP:

+ case Instruction::FPExt:

+ case Instruction::FPTrunc:

+ case Instruction::FAdd:

+ case Instruction::FSub:

+ case Instruction::FMul:

+ case Instruction::FDiv:

+ case Instruction::FRem:

+ case Instruction::ICmp:

+ case Instruction::FCmp:

+ return TTI.getUserCost(I);

+ default:

+ return UINT_MAX; // Disallow anything not whitelisted.

+ }

bool SpeculativeExecutionPass::considerHoistingFromTo(

BasicBlock &FromBlock, BasicBlock &ToBlock) {

SmallSet<const Instruction *, 8> NotHoisted;

@@ -223,7 +264,7 @@ bool SpeculativeExecutionPass::considerHoistingFromTo(

unsigned TotalSpeculationCost = 0;

for (auto& I : FromBlock) {

- const unsigned Cost = TTI->getUserCost(&I);

+ const unsigned Cost = ComputeSpeculationCost(&I, *TTI);

if (Cost != UINT_MAX && isSafeToSpeculativelyExecute(&I) &&

AllPrecedingUsesFromBlockHoisted(&I)) {

TotalSpeculationCost += Cost;