aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Transforms/Utils
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-01-20 11:41:25 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-01-20 11:41:25 +0000
commitd9484dd61cc151c4f34c31e07f693fefa66316b5 (patch)
treeab0560b3da293f1fafd3269c59692e929418f5c2 /contrib/llvm/lib/Transforms/Utils
parent79e0962d4c3cf1f0acf359a9d69cb3ac68c414c4 (diff)
parentd8e91e46262bc44006913e6796843909f1ac7bcd (diff)
Merge llvm trunk r351319, resolve conflicts, and update FREEBSD-Xlist.
Notes
Notes: svn path=/projects/clang800-import/; revision=343210
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils')
-rw-r--r--contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp34
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp200
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp68
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp47
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp105
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp32
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneModule.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp357
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp16
-rw-r--r--contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Evaluator.cpp5
-rw-r--r--contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp51
-rw-r--r--contrib/llvm/lib/Transforms/Utils/GuardUtils.cpp64
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp23
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp77
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Local.cpp423
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp77
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp29
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp92
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp143
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp1487
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp43
-rw-r--r--contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp51
-rw-r--r--contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp36
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp312
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp607
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SplitModule.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Utils.cpp1
39 files changed, 2345 insertions, 2128 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
index e3ef42362223..564537af0c2a 100644
--- a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -209,10 +209,18 @@ static bool addDiscriminators(Function &F) {
// Only the lowest 7 bits are used to represent a discriminator to fit
// it in 1 byte ULEB128 representation.
unsigned Discriminator = R.second ? ++LDM[L] : LDM[L];
- I.setDebugLoc(DIL->setBaseDiscriminator(Discriminator));
- LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
- << DIL->getColumn() << ":" << Discriminator << " " << I
- << "\n");
+ auto NewDIL = DIL->setBaseDiscriminator(Discriminator);
+ if (!NewDIL) {
+ LLVM_DEBUG(dbgs() << "Could not encode discriminator: "
+ << DIL->getFilename() << ":" << DIL->getLine() << ":"
+ << DIL->getColumn() << ":" << Discriminator << " "
+ << I << "\n");
+ } else {
+ I.setDebugLoc(NewDIL.getValue());
+ LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
+ << DIL->getColumn() << ":" << Discriminator << " " << I
+ << "\n");
+ }
Changed = true;
}
}
@@ -224,23 +232,31 @@ static bool addDiscriminators(Function &F) {
for (BasicBlock &B : F) {
LocationSet CallLocations;
for (auto &I : B.getInstList()) {
- CallInst *Current = dyn_cast<CallInst>(&I);
// We bypass intrinsic calls for the following two reasons:
// 1) We want to avoid a non-deterministic assigment of
// discriminators.
// 2) We want to minimize the number of base discriminators used.
- if (!Current || isa<IntrinsicInst>(&I))
+ if (!isa<InvokeInst>(I) && (!isa<CallInst>(I) || isa<IntrinsicInst>(I)))
continue;
- DILocation *CurrentDIL = Current->getDebugLoc();
+ DILocation *CurrentDIL = I.getDebugLoc();
if (!CurrentDIL)
continue;
Location L =
std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine());
if (!CallLocations.insert(L).second) {
unsigned Discriminator = ++LDM[L];
- Current->setDebugLoc(CurrentDIL->setBaseDiscriminator(Discriminator));
- Changed = true;
+ auto NewDIL = CurrentDIL->setBaseDiscriminator(Discriminator);
+ if (!NewDIL) {
+ LLVM_DEBUG(dbgs()
+ << "Could not encode discriminator: "
+ << CurrentDIL->getFilename() << ":"
+ << CurrentDIL->getLine() << ":" << CurrentDIL->getColumn()
+ << ":" << Discriminator << " " << I << "\n");
+ } else {
+ I.setDebugLoc(NewDIL.getValue());
+ Changed = true;
+ }
}
}
}
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 516a785dce1e..7da768252fc1 100644
--- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -20,11 +20,13 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/PostDominators.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
@@ -37,6 +39,7 @@
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>
#include <string>
@@ -45,42 +48,58 @@
using namespace llvm;
-void llvm::DeleteDeadBlock(BasicBlock *BB, DeferredDominance *DDT) {
- assert((pred_begin(BB) == pred_end(BB) ||
- // Can delete self loop.
- BB->getSinglePredecessor() == BB) && "Block is not dead!");
- TerminatorInst *BBTerm = BB->getTerminator();
- std::vector<DominatorTree::UpdateType> Updates;
+void llvm::DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU) {
+ SmallVector<BasicBlock *, 1> BBs = {BB};
+ DeleteDeadBlocks(BBs, DTU);
+}
- // Loop through all of our successors and make sure they know that one
- // of their predecessors is going away.
- if (DDT)
- Updates.reserve(BBTerm->getNumSuccessors());
- for (BasicBlock *Succ : BBTerm->successors()) {
- Succ->removePredecessor(BB);
- if (DDT)
- Updates.push_back({DominatorTree::Delete, BB, Succ});
- }
+void llvm::DeleteDeadBlocks(SmallVectorImpl <BasicBlock *> &BBs,
+ DomTreeUpdater *DTU) {
+#ifndef NDEBUG
+ // Make sure that all predecessors of each dead block is also dead.
+ SmallPtrSet<BasicBlock *, 4> Dead(BBs.begin(), BBs.end());
+ assert(Dead.size() == BBs.size() && "Duplicating blocks?");
+ for (auto *BB : Dead)
+ for (BasicBlock *Pred : predecessors(BB))
+ assert(Dead.count(Pred) && "All predecessors must be dead!");
+#endif
+
+ SmallVector<DominatorTree::UpdateType, 4> Updates;
+ for (auto *BB : BBs) {
+ // Loop through all of our successors and make sure they know that one
+ // of their predecessors is going away.
+ for (BasicBlock *Succ : successors(BB)) {
+ Succ->removePredecessor(BB);
+ if (DTU)
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
+ }
- // Zap all the instructions in the block.
- while (!BB->empty()) {
- Instruction &I = BB->back();
- // If this instruction is used, replace uses with an arbitrary value.
- // Because control flow can't get here, we don't care what we replace the
- // value with. Note that since this block is unreachable, and all values
- // contained within it must dominate their uses, that all uses will
- // eventually be removed (they are themselves dead).
- if (!I.use_empty())
- I.replaceAllUsesWith(UndefValue::get(I.getType()));
- BB->getInstList().pop_back();
+ // Zap all the instructions in the block.
+ while (!BB->empty()) {
+ Instruction &I = BB->back();
+ // If this instruction is used, replace uses with an arbitrary value.
+ // Because control flow can't get here, we don't care what we replace the
+ // value with. Note that since this block is unreachable, and all values
+ // contained within it must dominate their uses, that all uses will
+ // eventually be removed (they are themselves dead).
+ if (!I.use_empty())
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ BB->getInstList().pop_back();
+ }
+ new UnreachableInst(BB->getContext(), BB);
+ assert(BB->getInstList().size() == 1 &&
+ isa<UnreachableInst>(BB->getTerminator()) &&
+ "The successor list of BB isn't empty before "
+ "applying corresponding DTU updates.");
}
+ if (DTU)
+ DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
- if (DDT) {
- DDT->applyUpdates(Updates);
- DDT->deleteBB(BB); // Deferred deletion of BB.
- } else {
- BB->eraseFromParent(); // Zap the block!
- }
+ for (BasicBlock *BB : BBs)
+ if (DTU)
+ DTU->deleteBB(BB);
+ else
+ BB->eraseFromParent();
}
void llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
@@ -115,12 +134,9 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
return Changed;
}
-bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
- LoopInfo *LI,
- MemoryDependenceResults *MemDep,
- DeferredDominance *DDT) {
- assert(!(DT && DDT) && "Cannot call with both DT and DDT.");
-
+bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
+ LoopInfo *LI, MemorySSAUpdater *MSSAU,
+ MemoryDependenceResults *MemDep) {
if (BB->hasAddressTaken())
return false;
@@ -131,7 +147,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
// Don't break self-loops.
if (PredBB == BB) return false;
// Don't break unwinding instructions.
- if (PredBB->getTerminator()->isExceptional())
+ if (PredBB->getTerminator()->isExceptionalTerminator())
return false;
// Can't merge if there are multiple distinct successors.
@@ -154,10 +170,10 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
FoldSingleEntryPHINodes(BB, MemDep);
}
- // Deferred DT update: Collect all the edges that exit BB. These
- // dominator edges will be redirected from Pred.
+ // DTU update: Collect all the edges that exit BB.
+ // These dominator edges will be redirected from Pred.
std::vector<DominatorTree::UpdateType> Updates;
- if (DDT) {
+ if (DTU) {
Updates.reserve(1 + (2 * succ_size(BB)));
Updates.push_back({DominatorTree::Delete, PredBB, BB});
for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
@@ -166,6 +182,9 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
}
}
+ if (MSSAU)
+ MSSAU->moveAllAfterMergeBlocks(BB, PredBB, &*(BB->begin()));
+
// Delete the unconditional branch from the predecessor...
PredBB->getInstList().pop_back();
@@ -175,6 +194,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
// Move all definitions in the successor to the predecessor...
PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
+ new UnreachableInst(BB->getContext(), BB);
// Eliminate duplicate dbg.values describing the entry PHI node post-splice.
for (auto Incoming : IncomingValues) {
@@ -195,28 +215,24 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
if (!PredBB->hasName())
PredBB->takeName(BB);
- // Finally, erase the old block and update dominator info.
- if (DT)
- if (DomTreeNode *DTN = DT->getNode(BB)) {
- DomTreeNode *PredDTN = DT->getNode(PredBB);
- SmallVector<DomTreeNode *, 8> Children(DTN->begin(), DTN->end());
- for (DomTreeNode *DI : Children)
- DT->changeImmediateDominator(DI, PredDTN);
-
- DT->eraseNode(BB);
- }
-
if (LI)
LI->removeBlock(BB);
if (MemDep)
MemDep->invalidateCachedPredecessors();
- if (DDT) {
- DDT->deleteBB(BB); // Deferred deletion of BB.
- DDT->applyUpdates(Updates);
- } else {
- BB->eraseFromParent(); // Nuke BB.
+ // Finally, erase the old block and update dominator info.
+ if (DTU) {
+ assert(BB->getInstList().size() == 1 &&
+ isa<UnreachableInst>(BB->getTerminator()) &&
+ "The successor list of BB isn't empty before "
+ "applying corresponding DTU updates.");
+ DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ DTU->deleteBB(BB);
+ }
+
+ else {
+ BB->eraseFromParent(); // Nuke BB if DTU is nullptr.
}
return true;
}
@@ -261,13 +277,14 @@ void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
}
BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
- LoopInfo *LI) {
+ LoopInfo *LI, MemorySSAUpdater *MSSAU) {
unsigned SuccNum = GetSuccessorNumber(BB, Succ);
// If this is a critical edge, let SplitCriticalEdge do it.
- TerminatorInst *LatchTerm = BB->getTerminator();
- if (SplitCriticalEdge(LatchTerm, SuccNum, CriticalEdgeSplittingOptions(DT, LI)
- .setPreserveLCSSA()))
+ Instruction *LatchTerm = BB->getTerminator();
+ if (SplitCriticalEdge(
+ LatchTerm, SuccNum,
+ CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA()))
return LatchTerm->getSuccessor(SuccNum);
// If the edge isn't critical, then BB has a single successor or Succ has a
@@ -277,14 +294,14 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
// block.
assert(SP == BB && "CFG broken");
SP = nullptr;
- return SplitBlock(Succ, &Succ->front(), DT, LI);
+ return SplitBlock(Succ, &Succ->front(), DT, LI, MSSAU);
}
// Otherwise, if BB has a single successor, split it at the bottom of the
// block.
assert(BB->getTerminator()->getNumSuccessors() == 1 &&
"Should have a single succ!");
- return SplitBlock(BB, BB->getTerminator(), DT, LI);
+ return SplitBlock(BB, BB->getTerminator(), DT, LI, MSSAU);
}
unsigned
@@ -292,7 +309,7 @@ llvm::SplitAllCriticalEdges(Function &F,
const CriticalEdgeSplittingOptions &Options) {
unsigned NumBroken = 0;
for (BasicBlock &BB : F) {
- TerminatorInst *TI = BB.getTerminator();
+ Instruction *TI = BB.getTerminator();
if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
if (SplitCriticalEdge(TI, i, Options))
@@ -302,7 +319,8 @@ llvm::SplitAllCriticalEdges(Function &F,
}
BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
- DominatorTree *DT, LoopInfo *LI) {
+ DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU) {
BasicBlock::iterator SplitIt = SplitPt->getIterator();
while (isa<PHINode>(SplitIt) || SplitIt->isEHPad())
++SplitIt;
@@ -324,6 +342,11 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
DT->changeImmediateDominator(I, NewNode);
}
+ // Move MemoryAccesses still tracked in Old, but part of New now.
+ // Update accesses in successor blocks accordingly.
+ if (MSSAU)
+ MSSAU->moveAllAfterSpliceBlocks(Old, New, &*(New->begin()));
+
return New;
}
@@ -331,6 +354,7 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
ArrayRef<BasicBlock *> Preds,
DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU,
bool PreserveLCSSA, bool &HasLoopExit) {
// Update dominator tree if available.
if (DT) {
@@ -343,6 +367,10 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
}
}
+ // Update MemoryPhis after split if MemorySSA is available
+ if (MSSAU)
+ MSSAU->wireOldPredecessorsToNewImmediatePredecessor(OldBB, NewBB, Preds);
+
// The rest of the logic is only relevant for updating the loop structures.
if (!LI)
return;
@@ -483,7 +511,8 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
ArrayRef<BasicBlock *> Preds,
const char *Suffix, DominatorTree *DT,
- LoopInfo *LI, bool PreserveLCSSA) {
+ LoopInfo *LI, MemorySSAUpdater *MSSAU,
+ bool PreserveLCSSA) {
// Do not attempt to split that which cannot be split.
if (!BB->canSplitPredecessors())
return nullptr;
@@ -495,7 +524,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
std::string NewName = std::string(Suffix) + ".split-lp";
SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs, DT,
- LI, PreserveLCSSA);
+ LI, MSSAU, PreserveLCSSA);
return NewBBs[0];
}
@@ -529,7 +558,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// Update DominatorTree, LoopInfo, and LCCSA analysis information.
bool HasLoopExit = false;
- UpdateAnalysisInformation(BB, NewBB, Preds, DT, LI, PreserveLCSSA,
+ UpdateAnalysisInformation(BB, NewBB, Preds, DT, LI, MSSAU, PreserveLCSSA,
HasLoopExit);
if (!Preds.empty()) {
@@ -545,6 +574,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
const char *Suffix1, const char *Suffix2,
SmallVectorImpl<BasicBlock *> &NewBBs,
DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU,
bool PreserveLCSSA) {
assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!");
@@ -570,7 +600,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
}
bool HasLoopExit = false;
- UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DT, LI, PreserveLCSSA,
+ UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DT, LI, MSSAU, PreserveLCSSA,
HasLoopExit);
// Update the PHI nodes in OrigBB with the values coming from NewBB1.
@@ -606,7 +636,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
// Update DominatorTree, LoopInfo, and LCCSA analysis information.
HasLoopExit = false;
- UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DT, LI,
+ UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DT, LI, MSSAU,
PreserveLCSSA, HasLoopExit);
// Update the PHI nodes in OrigBB with the values coming from NewBB2.
@@ -644,7 +674,8 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
}
ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
- BasicBlock *Pred) {
+ BasicBlock *Pred,
+ DomTreeUpdater *DTU) {
Instruction *UncondBranch = Pred->getTerminator();
// Clone the return and add it to the end of the predecessor.
Instruction *NewRet = RI->clone();
@@ -678,19 +709,24 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
// longer branch to them.
BB->removePredecessor(Pred);
UncondBranch->eraseFromParent();
+
+ if (DTU)
+ DTU->deleteEdge(Pred, BB);
+
return cast<ReturnInst>(NewRet);
}
-TerminatorInst *
-llvm::SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
- bool Unreachable, MDNode *BranchWeights,
- DominatorTree *DT, LoopInfo *LI) {
+Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
+ Instruction *SplitBefore,
+ bool Unreachable,
+ MDNode *BranchWeights,
+ DominatorTree *DT, LoopInfo *LI) {
BasicBlock *Head = SplitBefore->getParent();
BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
- TerminatorInst *HeadOldTerm = Head->getTerminator();
+ Instruction *HeadOldTerm = Head->getTerminator();
LLVMContext &C = Head->getContext();
BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
- TerminatorInst *CheckTerm;
+ Instruction *CheckTerm;
if (Unreachable)
CheckTerm = new UnreachableInst(C, ThenBlock);
else
@@ -725,12 +761,12 @@ llvm::SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
}
void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
- TerminatorInst **ThenTerm,
- TerminatorInst **ElseTerm,
+ Instruction **ThenTerm,
+ Instruction **ElseTerm,
MDNode *BranchWeights) {
BasicBlock *Head = SplitBefore->getParent();
BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
- TerminatorInst *HeadOldTerm = Head->getTerminator();
+ Instruction *HeadOldTerm = Head->getTerminator();
LLVMContext &C = Head->getContext();
BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
BasicBlock *ElseBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 3e30c27a9f33..fafc9aaba5c9 100644
--- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
@@ -129,7 +130,7 @@ static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
}
BasicBlock *
-llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
+llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
const CriticalEdgeSplittingOptions &Options) {
if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges))
return nullptr;
@@ -198,6 +199,11 @@ llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
// If we have nothing to update, just return.
auto *DT = Options.DT;
auto *LI = Options.LI;
+ auto *MSSAU = Options.MSSAU;
+ if (MSSAU)
+ MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
+ DestBB, NewBB, {TIBB}, Options.MergeIdenticalEdges);
+
if (!DT && !LI)
return NewBB;
@@ -283,7 +289,7 @@ llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
if (!LoopPreds.empty()) {
assert(!DestBB->isEHPad() && "We don't split edges to EH pads!");
BasicBlock *NewExitBB = SplitBlockPredecessors(
- DestBB, LoopPreds, "split", DT, LI, Options.PreserveLCSSA);
+ DestBB, LoopPreds, "split", DT, LI, MSSAU, Options.PreserveLCSSA);
if (Options.PreserveLCSSA)
createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB);
}
@@ -312,7 +318,7 @@ findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) {
BasicBlock *IBB = nullptr;
for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) {
BasicBlock *PredBB = PN->getIncomingBlock(Pred);
- TerminatorInst *PredTerm = PredBB->getTerminator();
+ Instruction *PredTerm = PredBB->getTerminator();
switch (PredTerm->getOpcode()) {
case Instruction::IndirectBr:
if (IBB)
diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 245200362018..3466dedd3236 100644
--- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -38,6 +38,7 @@ STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture");
STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly");
STATISTIC(NumNoAlias, "Number of function returns inferred as noalias");
STATISTIC(NumNonNull, "Number of function returns inferred as nonnull returns");
+STATISTIC(NumReturnedArg, "Number of arguments inferred as returned");
static bool setDoesNotAccessMemory(Function &F) {
if (F.doesNotAccessMemory())
@@ -105,6 +106,14 @@ static bool setRetNonNull(Function &F) {
return true;
}
+static bool setReturnedArg(Function &F, unsigned ArgNo) {
+ if (F.hasParamAttribute(ArgNo, Attribute::Returned))
+ return false;
+ F.addParamAttr(ArgNo, Attribute::Returned);
+ ++NumReturnedArg;
+ return true;
+}
+
static bool setNonLazyBind(Function &F) {
if (F.hasFnAttribute(Attribute::NonLazyBind))
return false;
@@ -155,10 +164,12 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_strcpy:
- case LibFunc_stpcpy:
+ case LibFunc_strncpy:
case LibFunc_strcat:
case LibFunc_strncat:
- case LibFunc_strncpy:
+ Changed |= setReturnedArg(F, 0);
+ LLVM_FALLTHROUGH;
+ case LibFunc_stpcpy:
case LibFunc_stpncpy:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
@@ -270,9 +281,11 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_memcpy:
+ case LibFunc_memmove:
+ Changed |= setReturnedArg(F, 0);
+ LLVM_FALLTHROUGH;
case LibFunc_mempcpy:
case LibFunc_memccpy:
- case LibFunc_memmove:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
@@ -741,6 +754,8 @@ bool llvm::hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
LibFunc DoubleFn, LibFunc FloatFn,
LibFunc LongDoubleFn) {
switch (Ty->getTypeID()) {
+ case Type::HalfTyID:
+ return false;
case Type::FloatTyID:
return TLI->has(FloatFn);
case Type::DoubleTyID:
@@ -750,6 +765,24 @@ bool llvm::hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
}
}
+StringRef llvm::getUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
+ LibFunc DoubleFn, LibFunc FloatFn,
+ LibFunc LongDoubleFn) {
+ assert(hasUnaryFloatFn(TLI, Ty, DoubleFn, FloatFn, LongDoubleFn) &&
+ "Cannot get name for unavailable function!");
+
+ switch (Ty->getTypeID()) {
+ case Type::HalfTyID:
+ llvm_unreachable("No name for HalfTy!");
+ case Type::FloatTyID:
+ return TLI->getName(FloatFn);
+ case Type::DoubleTyID:
+ return TLI->getName(DoubleFn);
+ default:
+ return TLI->getName(LongDoubleFn);
+ }
+}
+
//- Emit LibCalls ------------------------------------------------------------//
Value *llvm::castToCStr(Value *V, IRBuilder<> &B) {
@@ -927,10 +960,10 @@ static void appendTypeSuffix(Value *Op, StringRef &Name,
}
}
-Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
- const AttributeList &Attrs) {
- SmallString<20> NameBuffer;
- appendTypeSuffix(Op, Name, NameBuffer);
+static Value *emitUnaryFloatFnCallHelper(Value *Op, StringRef Name,
+ IRBuilder<> &B,
+ const AttributeList &Attrs) {
+ assert((Name != "") && "Must specify Name to emitUnaryFloatFnCall");
Module *M = B.GetInsertBlock()->getModule();
Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
@@ -949,8 +982,29 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
return CI;
}
+Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
+ const AttributeList &Attrs) {
+ SmallString<20> NameBuffer;
+ appendTypeSuffix(Op, Name, NameBuffer);
+
+ return emitUnaryFloatFnCallHelper(Op, Name, B, Attrs);
+}
+
+Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI,
+ LibFunc DoubleFn, LibFunc FloatFn,
+ LibFunc LongDoubleFn, IRBuilder<> &B,
+ const AttributeList &Attrs) {
+ // Get the name of the function according to TLI.
+ StringRef Name = getUnaryFloatFn(TLI, Op->getType(),
+ DoubleFn, FloatFn, LongDoubleFn);
+
+ return emitUnaryFloatFnCallHelper(Op, Name, B, Attrs);
+}
+
Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
IRBuilder<> &B, const AttributeList &Attrs) {
+ assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall");
+
SmallString<20> NameBuffer;
appendTypeSuffix(Op1, Name, NameBuffer);
diff --git a/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index 6d18d0614611..e58ddcf34667 100644
--- a/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -177,8 +177,8 @@ static void createRetBitCast(CallSite CS, Type *RetTy, CastInst **RetBitCast) {
InsertBefore = &*std::next(CS.getInstruction()->getIterator());
// Bitcast the return value to the correct type.
- auto *Cast = CastInst::Create(Instruction::BitCast, CS.getInstruction(),
- RetTy, "", InsertBefore);
+ auto *Cast = CastInst::CreateBitOrPointerCast(CS.getInstruction(), RetTy, "",
+ InsertBefore);
if (RetBitCast)
*RetBitCast = Cast;
@@ -270,8 +270,8 @@ static Instruction *versionCallSite(CallSite CS, Value *Callee,
// Create an if-then-else structure. The original instruction is moved into
// the "else" block, and a clone of the original instruction is placed in the
// "then" block.
- TerminatorInst *ThenTerm = nullptr;
- TerminatorInst *ElseTerm = nullptr;
+ Instruction *ThenTerm = nullptr;
+ Instruction *ElseTerm = nullptr;
SplitBlockAndInsertIfThenElse(Cond, CS.getInstruction(), &ThenTerm, &ElseTerm,
BranchWeights);
BasicBlock *ThenBlock = ThenTerm->getParent();
@@ -321,12 +321,14 @@ bool llvm::isLegalToPromote(CallSite CS, Function *Callee,
const char **FailureReason) {
assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted");
+ auto &DL = Callee->getParent()->getDataLayout();
+
// Check the return type. The callee's return value type must be bitcast
// compatible with the call site's type.
Type *CallRetTy = CS.getInstruction()->getType();
Type *FuncRetTy = Callee->getReturnType();
if (CallRetTy != FuncRetTy)
- if (!CastInst::isBitCastable(FuncRetTy, CallRetTy)) {
+ if (!CastInst::isBitOrNoopPointerCastable(FuncRetTy, CallRetTy, DL)) {
if (FailureReason)
*FailureReason = "Return type mismatch";
return false;
@@ -351,7 +353,7 @@ bool llvm::isLegalToPromote(CallSite CS, Function *Callee,
Type *ActualTy = CS.getArgument(I)->getType();
if (FormalTy == ActualTy)
continue;
- if (!CastInst::isBitCastable(ActualTy, FormalTy)) {
+ if (!CastInst::isBitOrNoopPointerCastable(ActualTy, FormalTy, DL)) {
if (FailureReason)
*FailureReason = "Argument type mismatch";
return false;
@@ -391,21 +393,46 @@ Instruction *llvm::promoteCall(CallSite CS, Function *Callee,
// to the correct type.
auto CalleeType = Callee->getFunctionType();
auto CalleeParamNum = CalleeType->getNumParams();
+
+ LLVMContext &Ctx = Callee->getContext();
+ const AttributeList &CallerPAL = CS.getAttributes();
+ // The new list of argument attributes.
+ SmallVector<AttributeSet, 4> NewArgAttrs;
+ bool AttributeChanged = false;
+
for (unsigned ArgNo = 0; ArgNo < CalleeParamNum; ++ArgNo) {
auto *Arg = CS.getArgument(ArgNo);
Type *FormalTy = CalleeType->getParamType(ArgNo);
Type *ActualTy = Arg->getType();
if (FormalTy != ActualTy) {
- auto *Cast = CastInst::Create(Instruction::BitCast, Arg, FormalTy, "",
- CS.getInstruction());
+ auto *Cast = CastInst::CreateBitOrPointerCast(Arg, FormalTy, "",
+ CS.getInstruction());
CS.setArgument(ArgNo, Cast);
- }
+
+ // Remove any incompatible attributes for the argument.
+ AttrBuilder ArgAttrs(CallerPAL.getParamAttributes(ArgNo));
+ ArgAttrs.remove(AttributeFuncs::typeIncompatible(FormalTy));
+ NewArgAttrs.push_back(AttributeSet::get(Ctx, ArgAttrs));
+ AttributeChanged = true;
+ } else
+ NewArgAttrs.push_back(CallerPAL.getParamAttributes(ArgNo));
}
// If the return type of the call site doesn't match that of the callee, cast
// the returned value to the appropriate type.
- if (!CallSiteRetTy->isVoidTy() && CallSiteRetTy != CalleeRetTy)
+ // Remove any incompatible return value attribute.
+ AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
+ if (!CallSiteRetTy->isVoidTy() && CallSiteRetTy != CalleeRetTy) {
createRetBitCast(CS, CallSiteRetTy, RetBitCast);
+ RAttrs.remove(AttributeFuncs::typeIncompatible(CalleeRetTy));
+ AttributeChanged = true;
+ }
+
+ // Set the new callsite attribute.
+ if (AttributeChanged)
+ CS.setAttributes(AttributeList::get(Ctx, CallerPAL.getFnAttributes(),
+ AttributeSet::get(Ctx, RAttrs),
+ NewArgAttrs));
return CS.getInstruction();
}
diff --git a/contrib/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp b/contrib/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp
new file mode 100644
index 000000000000..cf41fd2e14c0
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp
@@ -0,0 +1,105 @@
+//===- CanonicalizeAliases.cpp - ThinLTO Support: Canonicalize Aliases ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Currently this file implements partial alias canonicalization, to
+// flatten chains of aliases (also done by GlobalOpt, but not on for
+// O0 compiles). E.g.
+// @a = alias i8, i8 *@b
+// @b = alias i8, i8 *@g
+//
+// will be converted to:
+// @a = alias i8, i8 *@g <-- @a is now an alias to base object @g
+// @b = alias i8, i8 *@g
+//
+// Eventually this file will implement full alias canonicalation, so that
+// all aliasees are private anonymous values. E.g.
+// @a = alias i8, i8 *@g
+// @g = global i8 0
+//
+// will be converted to:
+// @0 = private global
+// @a = alias i8, i8* @0
+// @g = alias i8, i8* @0
+//
+// This simplifies optimization and ThinLTO linking of the original symbols.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
+
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueHandle.h"
+
+using namespace llvm;
+
+namespace {
+
+static Constant *canonicalizeAlias(Constant *C, bool &Changed) {
+ if (auto *GA = dyn_cast<GlobalAlias>(C)) {
+ auto *NewAliasee = canonicalizeAlias(GA->getAliasee(), Changed);
+ if (NewAliasee != GA->getAliasee()) {
+ GA->setAliasee(NewAliasee);
+ Changed = true;
+ }
+ return NewAliasee;
+ }
+
+ auto *CE = dyn_cast<ConstantExpr>(C);
+ if (!CE)
+ return C;
+
+ std::vector<Constant *> Ops;
+ for (Use &U : CE->operands())
+ Ops.push_back(canonicalizeAlias(cast<Constant>(U), Changed));
+ return CE->getWithOperands(Ops);
+}
+
+/// Convert aliases to canonical form.
+static bool canonicalizeAliases(Module &M) {
+ bool Changed = false;
+ for (auto &GA : M.aliases())
+ canonicalizeAlias(&GA, Changed);
+ return Changed;
+}
+
+// Legacy pass that canonicalizes aliases.
+class CanonicalizeAliasesLegacyPass : public ModulePass {
+
+public:
+ /// Pass identification, replacement for typeid
+ static char ID;
+
+ /// Specify pass name for debug output
+ StringRef getPassName() const override { return "Canonicalize Aliases"; }
+
+ explicit CanonicalizeAliasesLegacyPass() : ModulePass(ID) {}
+
+ bool runOnModule(Module &M) override { return canonicalizeAliases(M); }
+};
+char CanonicalizeAliasesLegacyPass::ID = 0;
+
+} // anonymous namespace
+
+PreservedAnalyses CanonicalizeAliasesPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ if (!canonicalizeAliases(M))
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::none();
+}
+
+INITIALIZE_PASS_BEGIN(CanonicalizeAliasesLegacyPass, "canonicalize-aliases",
+ "Canonicalize aliases", false, false)
+INITIALIZE_PASS_END(CanonicalizeAliasesLegacyPass, "canonicalize-aliases",
+ "Canonicalize aliases", false, false)
+
+namespace llvm {
+ModulePass *createCanonicalizeAliasesPass() {
+ return new CanonicalizeAliasesLegacyPass();
+}
+} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 9ae60962a631..8f8c601f5f13 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -18,11 +18,11 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
@@ -32,6 +32,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <map>
using namespace llvm;
@@ -235,8 +236,8 @@ Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap,
ArgTypes, F->getFunctionType()->isVarArg());
// Create the new function...
- Function *NewF =
- Function::Create(FTy, F->getLinkage(), F->getName(), F->getParent());
+ Function *NewF = Function::Create(FTy, F->getLinkage(), F->getAddressSpace(),
+ F->getName(), F->getParent());
// Loop over the arguments, copying the names of the mapped arguments over...
Function::arg_iterator DestI = NewF->arg_begin();
@@ -365,7 +366,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
}
// Finally, clone over the terminator.
- const TerminatorInst *OldTI = BB->getTerminator();
+ const Instruction *OldTI = BB->getTerminator();
bool TerminatorDone = false;
if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
if (BI->isConditional()) {
@@ -414,8 +415,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
CodeInfo->OperandBundleCallSites.push_back(NewInst);
// Recursively clone any reachable successor blocks.
- const TerminatorInst *TI = BB->getTerminator();
- for (const BasicBlock *Succ : TI->successors())
+ const Instruction *TI = BB->getTerminator();
+ for (const BasicBlock *Succ : successors(TI))
ToClone.push_back(Succ);
}
@@ -795,11 +796,12 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
/// Duplicate non-Phi instructions from the beginning of block up to
/// StopAt instruction into a split block between BB and its predecessor.
-BasicBlock *
-llvm::DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB,
- Instruction *StopAt,
- ValueToValueMapTy &ValueMapping,
- DominatorTree *DT) {
+BasicBlock *llvm::DuplicateInstructionsInSplitBetween(
+ BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt,
+ ValueToValueMapTy &ValueMapping, DomTreeUpdater &DTU) {
+
+ assert(count(successors(PredBB), BB) == 1 &&
+ "There must be a single edge between PredBB and BB!");
// We are going to have to map operands from the original BB block to the new
// copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
// account for entry from PredBB.
@@ -807,10 +809,16 @@ llvm::DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB,
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
- BasicBlock *NewBB = SplitEdge(PredBB, BB, DT);
+ BasicBlock *NewBB = SplitEdge(PredBB, BB);
NewBB->setName(PredBB->getName() + ".split");
Instruction *NewTerm = NewBB->getTerminator();
+ // FIXME: SplitEdge does not yet take a DTU, so we include the split edge
+ // in the update set here.
+ DTU.applyUpdates({{DominatorTree::Delete, PredBB, BB},
+ {DominatorTree::Insert, PredBB, NewBB},
+ {DominatorTree::Insert, NewBB, BB}});
+
// Clone the non-phi instructions of BB into NewBB, keeping track of the
// mapping and using it to remap operands in the cloned instructions.
// Stop once we see the terminator too. This covers the case where BB's
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
index c7d68bab8170..659993aa5478 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -74,8 +74,9 @@ std::unique_ptr<Module> llvm::CloneModule(
// Loop over the functions in the module, making external functions as before
for (const Function &I : M) {
- Function *NF = Function::Create(cast<FunctionType>(I.getValueType()),
- I.getLinkage(), I.getName(), New.get());
+ Function *NF =
+ Function::Create(cast<FunctionType>(I.getValueType()), I.getLinkage(),
+ I.getAddressSpace(), I.getName(), New.get());
NF->copyAttributesFrom(&I);
VMap[&I] = NF;
}
@@ -91,8 +92,8 @@ std::unique_ptr<Module> llvm::CloneModule(
GlobalValue *GV;
if (I->getValueType()->isFunctionTy())
GV = Function::Create(cast<FunctionType>(I->getValueType()),
- GlobalValue::ExternalLinkage, I->getName(),
- New.get());
+ GlobalValue::ExternalLinkage,
+ I->getAddressSpace(), I->getName(), New.get());
else
GV = new GlobalVariable(
*New, I->getValueType(), false, GlobalValue::ExternalLinkage,
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index cb349e34606c..25d4ae583ecc 100644
--- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -57,6 +57,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -167,14 +168,22 @@ static bool isBlockValidForExtraction(const BasicBlock &BB,
continue;
}
- if (const CallInst *CI = dyn_cast<CallInst>(I))
- if (const Function *F = CI->getCalledFunction())
- if (F->getIntrinsicID() == Intrinsic::vastart) {
+ if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (const Function *F = CI->getCalledFunction()) {
+ auto IID = F->getIntrinsicID();
+ if (IID == Intrinsic::vastart) {
if (AllowVarArgs)
continue;
else
return false;
}
+
+ // Currently, we miscompile outlined copies of eh_typid_for. There are
+ // proposals for fixing this in llvm.org/PR39545.
+ if (IID == Intrinsic::eh_typeid_for)
+ return false;
+ }
+ }
}
return true;
@@ -228,19 +237,21 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
bool AggregateArgs, BlockFrequencyInfo *BFI,
BranchProbabilityInfo *BPI, bool AllowVarArgs,
- bool AllowAlloca)
+ bool AllowAlloca, std::string Suffix)
: DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
BPI(BPI), AllowVarArgs(AllowVarArgs),
- Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)) {}
+ Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)),
+ Suffix(Suffix) {}
CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
BlockFrequencyInfo *BFI,
- BranchProbabilityInfo *BPI)
+ BranchProbabilityInfo *BPI, std::string Suffix)
: DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
BPI(BPI), AllowVarArgs(false),
Blocks(buildExtractionBlockSet(L.getBlocks(), &DT,
/* AllowVarArgs */ false,
- /* AllowAlloca */ false)) {}
+ /* AllowAlloca */ false)),
+ Suffix(Suffix) {}
/// definedInRegion - Return true if the specified value is defined in the
/// extracted region.
@@ -321,8 +332,7 @@ bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
default: {
IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II);
if (IntrInst) {
- if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
- IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
+ if (IntrInst->isLifetimeStartOrEnd())
break;
return false;
}
@@ -520,10 +530,10 @@ void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
}
}
-/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the
-/// region, we need to split the entry block of the region so that the PHI node
-/// is easier to deal with.
-void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
+/// severSplitPHINodesOfEntry - If a PHI node has multiple inputs from outside
+/// of the region, we need to split the entry block of the region so that the
+/// PHI node is easier to deal with.
+void CodeExtractor::severSplitPHINodesOfEntry(BasicBlock *&Header) {
unsigned NumPredsFromRegion = 0;
unsigned NumPredsOutsideRegion = 0;
@@ -566,7 +576,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
// changing them to branch to NewBB instead.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (Blocks.count(PN->getIncomingBlock(i))) {
- TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator();
+ Instruction *TI = PN->getIncomingBlock(i)->getTerminator();
TI->replaceUsesOfWith(OldPred, NewBB);
}
@@ -595,6 +605,56 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
}
}
+/// severSplitPHINodesOfExits - if PHI nodes in exit blocks have inputs from
+/// outlined region, we split these PHIs on two: one with inputs from region
+/// and other with remaining incoming blocks; then first PHIs are placed in
+/// outlined region.
+void CodeExtractor::severSplitPHINodesOfExits(
+ const SmallPtrSetImpl<BasicBlock *> &Exits) {
+ for (BasicBlock *ExitBB : Exits) {
+ BasicBlock *NewBB = nullptr;
+
+ for (PHINode &PN : ExitBB->phis()) {
+ // Find all incoming values from the outlining region.
+ SmallVector<unsigned, 2> IncomingVals;
+ for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i)
+ if (Blocks.count(PN.getIncomingBlock(i)))
+ IncomingVals.push_back(i);
+
+ // Do not process PHI if there is one (or fewer) predecessor from region.
+ // If PHI has exactly one predecessor from region, only this one incoming
+ // will be replaced on codeRepl block, so it should be safe to skip PHI.
+ if (IncomingVals.size() <= 1)
+ continue;
+
+ // Create block for new PHIs and add it to the list of outlined if it
+ // wasn't done before.
+ if (!NewBB) {
+ NewBB = BasicBlock::Create(ExitBB->getContext(),
+ ExitBB->getName() + ".split",
+ ExitBB->getParent(), ExitBB);
+ SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBB),
+ pred_end(ExitBB));
+ for (BasicBlock *PredBB : Preds)
+ if (Blocks.count(PredBB))
+ PredBB->getTerminator()->replaceUsesOfWith(ExitBB, NewBB);
+ BranchInst::Create(ExitBB, NewBB);
+ Blocks.insert(NewBB);
+ }
+
+ // Split this PHI.
+ PHINode *NewPN =
+ PHINode::Create(PN.getType(), IncomingVals.size(),
+ PN.getName() + ".ce", NewBB->getFirstNonPHI());
+ for (unsigned i : IncomingVals)
+ NewPN->addIncoming(PN.getIncomingValue(i), PN.getIncomingBlock(i));
+ for (unsigned i : reverse(IncomingVals))
+ PN.removeIncomingValue(i, false);
+ PN.addIncoming(NewPN, NewBB);
+ }
+ }
+}
+
void CodeExtractor::splitReturnBlocks() {
for (BasicBlock *Block : Blocks)
if (ReturnInst *RI = dyn_cast<ReturnInst>(Block->getTerminator())) {
@@ -669,11 +729,14 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
FunctionType::get(RetTy, paramTy,
AllowVarArgs && oldFunction->isVarArg());
+ std::string SuffixToUse =
+ Suffix.empty()
+ ? (header->getName().empty() ? "extracted" : header->getName().str())
+ : Suffix;
// Create the new function
- Function *newFunction = Function::Create(funcType,
- GlobalValue::InternalLinkage,
- oldFunction->getName() + "_" +
- header->getName(), M);
+ Function *newFunction = Function::Create(
+ funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(),
+ oldFunction->getName() + "." + SuffixToUse, M);
// If the old function is no-throw, so is the new one.
if (oldFunction->doesNotThrow())
newFunction->setDoesNotThrow();
@@ -754,6 +817,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::SanitizeMemory:
case Attribute::SanitizeThread:
case Attribute::SanitizeHWAddress:
+ case Attribute::SpeculativeLoadHardening:
case Attribute::StackProtect:
case Attribute::StackProtectReq:
case Attribute::StackProtectStrong:
@@ -778,7 +842,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
Value *Idx[2];
Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
- TerminatorInst *TI = newFunction->begin()->getTerminator();
+ Instruction *TI = newFunction->begin()->getTerminator();
GetElementPtrInst *GEP = GetElementPtrInst::Create(
StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI);
RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
@@ -808,10 +872,10 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
for (unsigned i = 0, e = Users.size(); i != e; ++i)
// The BasicBlock which contains the branch is not in the region
// modify the branch target to a new block
- if (TerminatorInst *TI = dyn_cast<TerminatorInst>(Users[i]))
- if (!Blocks.count(TI->getParent()) &&
- TI->getParent()->getParent() == oldFunction)
- TI->replaceUsesOfWith(header, newHeader);
+ if (Instruction *I = dyn_cast<Instruction>(Users[i]))
+ if (I->isTerminator() && !Blocks.count(I->getParent()) &&
+ I->getParent()->getParent() == oldFunction)
+ I->replaceUsesOfWith(header, newHeader);
return newFunction;
}
@@ -819,9 +883,10 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
/// emitCallAndSwitchStatement - This method sets up the caller side by adding
/// the call instruction, splitting any PHI nodes in the header block as
/// necessary.
-void CodeExtractor::
-emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
- ValueSet &inputs, ValueSet &outputs) {
+CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
+ BasicBlock *codeReplacer,
+ ValueSet &inputs,
+ ValueSet &outputs) {
// Emit a call to the new function, passing in: *pointer to struct (if
// aggregating parameters), or plan inputs and allocated memory for outputs
std::vector<Value *> params, StructValues, ReloadOutputs, Reloads;
@@ -829,6 +894,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
Module *M = newFunction->getParent();
LLVMContext &Context = M->getContext();
const DataLayout &DL = M->getDataLayout();
+ CallInst *call = nullptr;
// Add inputs as params, or to be filled into the struct
for (Value *input : inputs)
@@ -879,8 +945,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
}
// Emit the call to the function
- CallInst *call = CallInst::Create(newFunction, params,
- NumExitBlocks > 1 ? "targetBlock" : "");
+ call = CallInst::Create(newFunction, params,
+ NumExitBlocks > 1 ? "targetBlock" : "");
// Add debug location to the new call, if the original function has debug
// info. In that case, the terminator of the entry block of the extracted
// function contains the first debug location of the extracted function,
@@ -925,11 +991,17 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
auto *OutI = dyn_cast<Instruction>(outputs[i]);
if (!OutI)
continue;
+
// Find proper insertion point.
- Instruction *InsertPt = OutI->getNextNode();
- // Let's assume that there is no other guy interleave non-PHI in PHIs.
- if (isa<PHINode>(InsertPt))
- InsertPt = InsertPt->getParent()->getFirstNonPHI();
+ BasicBlock::iterator InsertPt;
+ // In case OutI is an invoke, we insert the store at the beginning in the
+ // 'normal destination' BB. Otherwise we insert the store right after OutI.
+ if (auto *InvokeI = dyn_cast<InvokeInst>(OutI))
+ InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt();
+ else if (auto *Phi = dyn_cast<PHINode>(OutI))
+ InsertPt = Phi->getParent()->getFirstInsertionPt();
+ else
+ InsertPt = std::next(OutI->getIterator());
assert(OAI != newFunction->arg_end() &&
"Number of output arguments should match "
@@ -939,13 +1011,13 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
GetElementPtrInst *GEP = GetElementPtrInst::Create(
- StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), InsertPt);
- new StoreInst(outputs[i], GEP, InsertPt);
+ StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), &*InsertPt);
+ new StoreInst(outputs[i], GEP, &*InsertPt);
// Since there should be only one struct argument aggregating
// all the output values, we shouldn't increment OAI, which always
// points to the struct argument, in this case.
} else {
- new StoreInst(outputs[i], &*OAI, InsertPt);
+ new StoreInst(outputs[i], &*OAI, &*InsertPt);
++OAI;
}
}
@@ -964,7 +1036,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
unsigned switchVal = 0;
for (BasicBlock *Block : Blocks) {
- TerminatorInst *TI = Block->getTerminator();
+ Instruction *TI = Block->getTerminator();
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
if (!Blocks.count(TI->getSuccessor(i))) {
BasicBlock *OldTarget = TI->getSuccessor(i);
@@ -1046,6 +1118,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1));
break;
}
+
+ return call;
}
void CodeExtractor::moveCodeToFunction(Function *newFunction) {
@@ -1070,7 +1144,7 @@ void CodeExtractor::calculateNewCallTerminatorWeights(
using BlockNode = BlockFrequencyInfoImplBase::BlockNode;
// Update the branch weights for the exit block.
- TerminatorInst *TI = CodeReplacer->getTerminator();
+ Instruction *TI = CodeReplacer->getTerminator();
SmallVector<unsigned, 8> BranchWeights(TI->getNumSuccessors(), 0);
// Block Frequency distribution with dummy node.
@@ -1107,6 +1181,71 @@ void CodeExtractor::calculateNewCallTerminatorWeights(
MDBuilder(TI->getContext()).createBranchWeights(BranchWeights));
}
+/// Scan the extraction region for lifetime markers which reference inputs.
+/// Erase these markers. Return the inputs which were referenced.
+///
+/// The extraction region is defined by a set of blocks (\p Blocks), and a set
+/// of allocas which will be moved from the caller function into the extracted
+/// function (\p SunkAllocas).
+static SetVector<Value *>
+eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks,
+ const SetVector<Value *> &SunkAllocas) {
+ SetVector<Value *> InputObjectsWithLifetime;
+ for (BasicBlock *BB : Blocks) {
+ for (auto It = BB->begin(), End = BB->end(); It != End;) {
+ auto *II = dyn_cast<IntrinsicInst>(&*It);
+ ++It;
+ if (!II || !II->isLifetimeStartOrEnd())
+ continue;
+
+ // Get the memory operand of the lifetime marker. If the underlying
+ // object is a sunk alloca, or is otherwise defined in the extraction
+ // region, the lifetime marker must not be erased.
+ Value *Mem = II->getOperand(1)->stripInBoundsOffsets();
+ if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem))
+ continue;
+
+ InputObjectsWithLifetime.insert(Mem);
+ II->eraseFromParent();
+ }
+ }
+ return InputObjectsWithLifetime;
+}
+
+/// Insert lifetime start/end markers surrounding the call to the new function
+/// for objects defined in the caller.
+static void insertLifetimeMarkersSurroundingCall(
+ Module *M, const SetVector<Value *> &InputObjectsWithLifetime,
+ CallInst *TheCall) {
+ if (InputObjectsWithLifetime.empty())
+ return;
+
+ LLVMContext &Ctx = M->getContext();
+ auto Int8PtrTy = Type::getInt8PtrTy(Ctx);
+ auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1);
+ auto LifetimeStartFn = llvm::Intrinsic::getDeclaration(
+ M, llvm::Intrinsic::lifetime_start, Int8PtrTy);
+ auto LifetimeEndFn = llvm::Intrinsic::getDeclaration(
+ M, llvm::Intrinsic::lifetime_end, Int8PtrTy);
+ for (Value *Mem : InputObjectsWithLifetime) {
+ assert((!isa<Instruction>(Mem) ||
+ cast<Instruction>(Mem)->getFunction() == TheCall->getFunction()) &&
+ "Input memory not defined in original function");
+ Value *MemAsI8Ptr = nullptr;
+ if (Mem->getType() == Int8PtrTy)
+ MemAsI8Ptr = Mem;
+ else
+ MemAsI8Ptr =
+ CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall);
+
+ auto StartMarker =
+ CallInst::Create(LifetimeStartFn, {NegativeOne, MemAsI8Ptr});
+ StartMarker->insertBefore(TheCall);
+ auto EndMarker = CallInst::Create(LifetimeEndFn, {NegativeOne, MemAsI8Ptr});
+ EndMarker->insertAfter(TheCall);
+ }
+}
+
Function *CodeExtractor::extractCodeRegion() {
if (!isEligible())
return nullptr;
@@ -1150,13 +1289,33 @@ Function *CodeExtractor::extractCodeRegion() {
}
}
- // If we have to split PHI nodes or the entry block, do so now.
- severSplitPHINodes(header);
-
// If we have any return instructions in the region, split those blocks so
// that the return is not in the region.
splitReturnBlocks();
+ // Calculate the exit blocks for the extracted region and the total exit
+ // weights for each of those blocks.
+ DenseMap<BasicBlock *, BlockFrequency> ExitWeights;
+ SmallPtrSet<BasicBlock *, 1> ExitBlocks;
+ for (BasicBlock *Block : Blocks) {
+ for (succ_iterator SI = succ_begin(Block), SE = succ_end(Block); SI != SE;
+ ++SI) {
+ if (!Blocks.count(*SI)) {
+ // Update the branch weight for this successor.
+ if (BFI) {
+ BlockFrequency &BF = ExitWeights[*SI];
+ BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, *SI);
+ }
+ ExitBlocks.insert(*SI);
+ }
+ }
+ }
+ NumExitBlocks = ExitBlocks.size();
+
+ // If we have to split PHI nodes of the entry or exit blocks, do so now.
+ severSplitPHINodesOfEntry(header);
+ severSplitPHINodesOfExits(ExitBlocks);
+
// This takes place of the original loop
BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),
"codeRepl", oldFunction,
@@ -1201,30 +1360,17 @@ Function *CodeExtractor::extractCodeRegion() {
cast<Instruction>(II)->moveBefore(TI);
}
- // Calculate the exit blocks for the extracted region and the total exit
- // weights for each of those blocks.
- DenseMap<BasicBlock *, BlockFrequency> ExitWeights;
- SmallPtrSet<BasicBlock *, 1> ExitBlocks;
- for (BasicBlock *Block : Blocks) {
- for (succ_iterator SI = succ_begin(Block), SE = succ_end(Block); SI != SE;
- ++SI) {
- if (!Blocks.count(*SI)) {
- // Update the branch weight for this successor.
- if (BFI) {
- BlockFrequency &BF = ExitWeights[*SI];
- BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, *SI);
- }
- ExitBlocks.insert(*SI);
- }
- }
- }
- NumExitBlocks = ExitBlocks.size();
+ // Collect objects which are inputs to the extraction region and also
+ // referenced by lifetime start/end markers within it. The effects of these
+ // markers must be replicated in the calling function to prevent the stack
+ // coloring pass from merging slots which store input objects.
+ ValueSet InputObjectsWithLifetime =
+ eraseLifetimeMarkersOnInputs(Blocks, SinkingCands);
// Construct new function based on inputs/outputs & add allocas for all defs.
- Function *newFunction = constructFunction(inputs, outputs, header,
- newFuncRoot,
- codeReplacer, oldFunction,
- oldFunction->getParent());
+ Function *newFunction =
+ constructFunction(inputs, outputs, header, newFuncRoot, codeReplacer,
+ oldFunction, oldFunction->getParent());
// Update the entry count of the function.
if (BFI) {
@@ -1235,10 +1381,16 @@ Function *CodeExtractor::extractCodeRegion() {
BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency());
}
- emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
+ CallInst *TheCall =
+ emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
moveCodeToFunction(newFunction);
+ // Replicate the effects of any lifetime start/end markers which referenced
+ // input objects in the extraction region by placing markers around the call.
+ insertLifetimeMarkersSurroundingCall(oldFunction->getParent(),
+ InputObjectsWithLifetime, TheCall);
+
// Propagate personality info to the new function if there is one.
if (oldFunction->hasPersonalityFn())
newFunction->setPersonalityFn(oldFunction->getPersonalityFn());
@@ -1247,8 +1399,8 @@ Function *CodeExtractor::extractCodeRegion() {
if (BFI && NumExitBlocks > 1)
calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI);
- // Loop over all of the PHI nodes in the header block, and change any
- // references to the old incoming edge to be the new incoming edge.
+ // Loop over all of the PHI nodes in the header and exit blocks, and change
+ // any references to the old incoming edge to be the new incoming edge.
for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) {
PHINode *PN = cast<PHINode>(I);
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
@@ -1256,29 +1408,60 @@ Function *CodeExtractor::extractCodeRegion() {
PN->setIncomingBlock(i, newFuncRoot);
}
- // Look at all successors of the codeReplacer block. If any of these blocks
- // had PHI nodes in them, we need to update the "from" block to be the code
- // replacer, not the original block in the extracted region.
- std::vector<BasicBlock *> Succs(succ_begin(codeReplacer),
- succ_end(codeReplacer));
- for (unsigned i = 0, e = Succs.size(); i != e; ++i)
- for (BasicBlock::iterator I = Succs[i]->begin(); isa<PHINode>(I); ++I) {
- PHINode *PN = cast<PHINode>(I);
- std::set<BasicBlock*> ProcessedPreds;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (Blocks.count(PN->getIncomingBlock(i))) {
- if (ProcessedPreds.insert(PN->getIncomingBlock(i)).second)
- PN->setIncomingBlock(i, codeReplacer);
- else {
- // There were multiple entries in the PHI for this block, now there
- // is only one, so remove the duplicated entries.
- PN->removeIncomingValue(i, false);
- --i; --e;
- }
- }
+ for (BasicBlock *ExitBB : ExitBlocks)
+ for (PHINode &PN : ExitBB->phis()) {
+ Value *IncomingCodeReplacerVal = nullptr;
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ // Ignore incoming values from outside of the extracted region.
+ if (!Blocks.count(PN.getIncomingBlock(i)))
+ continue;
+
+ // Ensure that there is only one incoming value from codeReplacer.
+ if (!IncomingCodeReplacerVal) {
+ PN.setIncomingBlock(i, codeReplacer);
+ IncomingCodeReplacerVal = PN.getIncomingValue(i);
+ } else
+ assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) &&
+ "PHI has two incompatbile incoming values from codeRepl");
+ }
+ }
+
+ // Erase debug info intrinsics. Variable updates within the new function are
+ // invisible to debuggers. This could be improved by defining a DISubprogram
+ // for the new function.
+ for (BasicBlock &BB : *newFunction) {
+ auto BlockIt = BB.begin();
+ // Remove debug info intrinsics from the new function.
+ while (BlockIt != BB.end()) {
+ Instruction *Inst = &*BlockIt;
+ ++BlockIt;
+ if (isa<DbgInfoIntrinsic>(Inst))
+ Inst->eraseFromParent();
}
+ // Remove debug info intrinsics which refer to values in the new function
+ // from the old function.
+ SmallVector<DbgVariableIntrinsic *, 4> DbgUsers;
+ for (Instruction &I : BB)
+ findDbgUsers(DbgUsers, &I);
+ for (DbgVariableIntrinsic *DVI : DbgUsers)
+ DVI->eraseFromParent();
+ }
- LLVM_DEBUG(if (verifyFunction(*newFunction))
- report_fatal_error("verifyFunction failed!"));
+ // Mark the new function `noreturn` if applicable. Terminators which resume
+ // exception propagation are treated as returning instructions. This is to
+ // avoid inserting traps after calls to outlined functions which unwind.
+ bool doesNotReturn = none_of(*newFunction, [](const BasicBlock &BB) {
+ const Instruction *Term = BB.getTerminator();
+ return isa<ReturnInst>(Term) || isa<ResumeInst>(Term);
+ });
+ if (doesNotReturn)
+ newFunction->setDoesNotReturn();
+
+ LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) {
+ newFunction->dump();
+ report_fatal_error("verification of newFunction failed!");
+ });
+ LLVM_DEBUG(if (verifyFunction(*oldFunction))
+ report_fatal_error("verification of oldFunction failed!"));
return newFunction;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
index 9a0240144d08..4e7da7d0449f 100644
--- a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
@@ -22,11 +22,10 @@
#define DEBUG_TYPE "ctor_utils"
-namespace llvm {
+using namespace llvm;
-namespace {
/// Given a specified llvm.global_ctors list, remove the listed elements.
-void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemove) {
+static void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemove) {
// Filter out the initializer elements to remove.
ConstantArray *OldCA = cast<ConstantArray>(GCL->getInitializer());
SmallVector<Constant *, 10> CAList;
@@ -64,7 +63,7 @@ void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemove) {
/// Given a llvm.global_ctors list that we can understand,
/// return a list of the functions and null terminator as a vector.
-std::vector<Function *> parseGlobalCtors(GlobalVariable *GV) {
+static std::vector<Function *> parseGlobalCtors(GlobalVariable *GV) {
if (GV->getInitializer()->isNullValue())
return std::vector<Function *>();
ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
@@ -79,7 +78,7 @@ std::vector<Function *> parseGlobalCtors(GlobalVariable *GV) {
/// Find the llvm.global_ctors list, verifying that all initializers have an
/// init priority of 65535.
-GlobalVariable *findGlobalCtors(Module &M) {
+static GlobalVariable *findGlobalCtors(Module &M) {
GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
if (!GV)
return nullptr;
@@ -112,12 +111,11 @@ GlobalVariable *findGlobalCtors(Module &M) {
return GV;
}
-} // namespace
/// Call "ShouldRemove" for every entry in M's global_ctor list and remove the
/// entries for which it returns true. Return true if anything changed.
-bool optimizeGlobalCtorsList(Module &M,
- function_ref<bool(Function *)> ShouldRemove) {
+bool llvm::optimizeGlobalCtorsList(
+ Module &M, function_ref<bool(Function *)> ShouldRemove) {
GlobalVariable *GlobalCtors = findGlobalCtors(M);
if (!GlobalCtors)
return false;
@@ -160,5 +158,3 @@ bool optimizeGlobalCtorsList(Module &M,
removeGlobalCtors(GlobalCtors, CtorsToRemove);
return true;
}
-
-} // End llvm namespace
diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
index 56ff03c7f5e1..975b363859a9 100644
--- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -90,7 +90,7 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
// careful if I is an invoke instruction, because we can't insert the store
// AFTER the terminator instruction.
BasicBlock::iterator InsertPt;
- if (!isa<TerminatorInst>(I)) {
+ if (!I.isTerminator()) {
InsertPt = ++I.getIterator();
for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
/* empty */; // Don't insert before PHI nodes or landingpad instrs.
diff --git a/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp b/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
index c9c96fbe5da0..762a374c135c 100644
--- a/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
@@ -37,7 +37,7 @@ IRBuilder<> *EscapeEnumerator::Next() {
// Branches and invokes do not escape, only unwind, resume, and return
// do.
- TerminatorInst *TI = CurBB->getTerminator();
+ Instruction *TI = CurBB->getTerminator();
if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
continue;
diff --git a/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp b/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp
index 7fd9425efed3..e875cd686b00 100644
--- a/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp
@@ -483,8 +483,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
}
}
- if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ if (II->isLifetimeStartOrEnd()) {
LLVM_DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n");
++CurInst;
continue;
@@ -578,7 +577,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
<< "Successfully evaluated function. Result: 0\n\n");
}
}
- } else if (isa<TerminatorInst>(CurInst)) {
+ } else if (CurInst->isTerminator()) {
LLVM_DEBUG(dbgs() << "Found a terminator instruction.\n");
if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
diff --git a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
index 3c6c9c9a5df4..d9778f4a1fb7 100644
--- a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
@@ -232,7 +232,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock))
return false;
- TerminatorInst *TBB = LastCondBlock->getTerminator();
+ Instruction *TBB = LastCondBlock->getTerminator();
BasicBlock *PS1 = TBB->getSuccessor(0);
BasicBlock *PS2 = TBB->getSuccessor(1);
BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator());
@@ -325,7 +325,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
BasicBlock *Block1,
BasicBlock *Block2) {
- TerminatorInst *PTI2 = Head2->getTerminator();
+ Instruction *PTI2 = Head2->getTerminator();
Instruction *PBI2 = &Head2->front();
bool eq1 = (Block1 == Head1);
@@ -421,7 +421,7 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
if ((IfTrue2 != SecondEntryBlock) && (IfFalse2 != SecondEntryBlock))
return false;
- TerminatorInst *PTI2 = SecondEntryBlock->getTerminator();
+ Instruction *PTI2 = SecondEntryBlock->getTerminator();
Instruction *PBI2 = &SecondEntryBlock->front();
if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1,
diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp
index 69203f9f2485..a717d9b72819 100644
--- a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp
@@ -410,8 +410,6 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
switch (TyL->getTypeID()) {
default:
llvm_unreachable("Unknown type!");
- // Fall through in Release mode.
- LLVM_FALLTHROUGH;
case Type::IntegerTyID:
return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(),
cast<IntegerType>(TyR)->getBitWidth());
@@ -867,8 +865,8 @@ int FunctionComparator::compare() {
if (int Res = cmpBasicBlocks(BBL, BBR))
return Res;
- const TerminatorInst *TermL = BBL->getTerminator();
- const TerminatorInst *TermR = BBR->getTerminator();
+ const Instruction *TermL = BBL->getTerminator();
+ const Instruction *TermR = BBR->getTerminator();
assert(TermL->getNumSuccessors() == TermR->getNumSuccessors());
for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) {
@@ -938,7 +936,7 @@ FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
for (auto &Inst : *BB) {
H.add(Inst.getOpcode());
}
- const TerminatorInst *Term = BB->getTerminator();
+ const Instruction *Term = BB->getTerminator();
for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
continue;
diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
index 479816a339d0..a9772e31da50 100644
--- a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -124,7 +124,6 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
return SGV->getLinkage();
switch (SGV->getLinkage()) {
- case GlobalValue::LinkOnceAnyLinkage:
case GlobalValue::LinkOnceODRLinkage:
case GlobalValue::ExternalLinkage:
// External and linkonce definitions are converted to available_externally
@@ -144,11 +143,13 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
// An imported available_externally declaration stays that way.
return SGV->getLinkage();
+ case GlobalValue::LinkOnceAnyLinkage:
case GlobalValue::WeakAnyLinkage:
- // Can't import weak_any definitions correctly, or we might change the
- // program semantics, since the linker will pick the first weak_any
- // definition and importing would change the order they are seen by the
- // linker. The module linking caller needs to enforce this.
+ // Can't import linkonce_any/weak_any definitions correctly, or we might
+ // change the program semantics, since the linker will pick the first
+ // linkonce_any/weak_any definition and importing would change the order
+ // they are seen by the linker. The module linking caller needs to enforce
+ // this.
assert(!doImportAsDefinition(SGV));
// If imported as a declaration, it becomes external_weak.
return SGV->getLinkage();
@@ -202,10 +203,26 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
- // Check the summaries to see if the symbol gets resolved to a known local
- // definition.
+ ValueInfo VI;
if (GV.hasName()) {
- ValueInfo VI = ImportIndex.getValueInfo(GV.getGUID());
+ VI = ImportIndex.getValueInfo(GV.getGUID());
+ // Set synthetic function entry counts.
+ if (VI && ImportIndex.hasSyntheticEntryCounts()) {
+ if (Function *F = dyn_cast<Function>(&GV)) {
+ if (!F->isDeclaration()) {
+ for (auto &S : VI.getSummaryList()) {
+ FunctionSummary *FS = dyn_cast<FunctionSummary>(S->getBaseObject());
+ if (FS->modulePath() == M.getModuleIdentifier()) {
+ F->setEntryCount(Function::ProfileCount(FS->entryCount(),
+ Function::PCT_Synthetic));
+ break;
+ }
+ }
+ }
+ }
+ }
+ // Check the summaries to see if the symbol gets resolved to a known local
+ // definition.
if (VI && VI.isDSOLocal()) {
GV.setDSOLocal(true);
if (GV.hasDLLImportStorageClass())
@@ -213,6 +230,22 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
}
}
+ // Mark read-only variables which can be imported with specific attribute.
+ // We can't internalize them now because IRMover will fail to link variable
+ // definitions to their external declarations during ThinLTO import. We'll
+ // internalize read-only variables later, after import is finished.
+ // See internalizeImmutableGVs.
+ //
+ // If global value dead stripping is not enabled in summary then
+ // propagateConstants hasn't been run. We can't internalize GV
+ // in such case.
+ if (!GV.isDeclaration() && VI && ImportIndex.withGlobalValueDeadStripping()) {
+ const auto &SL = VI.getSummaryList();
+ auto *GVS = SL.empty() ? nullptr : dyn_cast<GlobalVarSummary>(SL[0].get());
+ if (GVS && GVS->isReadOnly())
+ cast<GlobalVariable>(&GV)->addAttribute("thinlto-internalize");
+ }
+
bool DoPromote = false;
if (GV.hasLocalLinkage() &&
((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) {
@@ -230,7 +263,7 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
// Remove functions imported as available externally defs from comdats,
// as this is a declaration for the linker, and will be dropped eventually.
// It is illegal for comdats to contain declarations.
- auto *GO = dyn_cast_or_null<GlobalObject>(&GV);
+ auto *GO = dyn_cast<GlobalObject>(&GV);
if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) {
// The IRMover should not have placed any imported declarations in
// a comdat, so the only declaration that should be in a comdat
diff --git a/contrib/llvm/lib/Transforms/Utils/GuardUtils.cpp b/contrib/llvm/lib/Transforms/Utils/GuardUtils.cpp
new file mode 100644
index 000000000000..08de0a4c53e9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/GuardUtils.cpp
@@ -0,0 +1,64 @@
+//===-- GuardUtils.cpp - Utils for work with guards -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Utils that are used to perform transformations related to guards and their
+// conditions.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/GuardUtils.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+static cl::opt<uint32_t> PredicatePassBranchWeight(
+ "guards-predicate-pass-branch-weight", cl::Hidden, cl::init(1 << 20),
+ cl::desc("The probability of a guard failing is assumed to be the "
+ "reciprocal of this value (default = 1 << 20)"));
+
+void llvm::makeGuardControlFlowExplicit(Function *DeoptIntrinsic,
+ CallInst *Guard) {
+ OperandBundleDef DeoptOB(*Guard->getOperandBundle(LLVMContext::OB_deopt));
+ SmallVector<Value *, 4> Args(std::next(Guard->arg_begin()), Guard->arg_end());
+
+ auto *CheckBB = Guard->getParent();
+ auto *DeoptBlockTerm =
+ SplitBlockAndInsertIfThen(Guard->getArgOperand(0), Guard, true);
+
+ auto *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
+
+ // SplitBlockAndInsertIfThen inserts control flow that branches to
+ // DeoptBlockTerm if the condition is true. We want the opposite.
+ CheckBI->swapSuccessors();
+
+ CheckBI->getSuccessor(0)->setName("guarded");
+ CheckBI->getSuccessor(1)->setName("deopt");
+
+ if (auto *MD = Guard->getMetadata(LLVMContext::MD_make_implicit))
+ CheckBI->setMetadata(LLVMContext::MD_make_implicit, MD);
+
+ MDBuilder MDB(Guard->getContext());
+ CheckBI->setMetadata(LLVMContext::MD_prof,
+ MDB.createBranchWeights(PredicatePassBranchWeight, 1));
+
+ IRBuilder<> B(DeoptBlockTerm);
+ auto *DeoptCall = B.CreateCall(DeoptIntrinsic, Args, {DeoptOB}, "");
+
+ if (DeoptIntrinsic->getReturnType()->isVoidTy()) {
+ B.CreateRetVoid();
+ } else {
+ DeoptCall->setName("deoptcall");
+ B.CreateRet(DeoptCall);
+ }
+
+ DeoptCall->setCallingConv(Guard->getCallingConv());
+ DeoptBlockTerm->eraseFromParent();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
index 8382220fc9e1..02482c550321 100644
--- a/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
@@ -161,7 +161,7 @@ void ImportedFunctionsInliningStatistics::dump(const bool Verbose) {
void ImportedFunctionsInliningStatistics::calculateRealInlines() {
// Removing duplicated Callers.
- llvm::sort(NonImportedCallers.begin(), NonImportedCallers.end());
+ llvm::sort(NonImportedCallers);
NonImportedCallers.erase(
std::unique(NonImportedCallers.begin(), NonImportedCallers.end()),
NonImportedCallers.end());
@@ -190,17 +190,14 @@ ImportedFunctionsInliningStatistics::getSortedNodes() {
for (const NodesMapTy::value_type& Node : NodesMap)
SortedNodes.push_back(&Node);
- llvm::sort(
- SortedNodes.begin(), SortedNodes.end(),
- [&](const SortedNodesTy::value_type &Lhs,
- const SortedNodesTy::value_type &Rhs) {
- if (Lhs->second->NumberOfInlines != Rhs->second->NumberOfInlines)
- return Lhs->second->NumberOfInlines > Rhs->second->NumberOfInlines;
- if (Lhs->second->NumberOfRealInlines !=
- Rhs->second->NumberOfRealInlines)
- return Lhs->second->NumberOfRealInlines >
- Rhs->second->NumberOfRealInlines;
- return Lhs->first() < Rhs->first();
- });
+ llvm::sort(SortedNodes, [&](const SortedNodesTy::value_type &Lhs,
+ const SortedNodesTy::value_type &Rhs) {
+ if (Lhs->second->NumberOfInlines != Rhs->second->NumberOfInlines)
+ return Lhs->second->NumberOfInlines > Rhs->second->NumberOfInlines;
+ if (Lhs->second->NumberOfRealInlines != Rhs->second->NumberOfRealInlines)
+ return Lhs->second->NumberOfRealInlines >
+ Rhs->second->NumberOfRealInlines;
+ return Lhs->first() < Rhs->first();
+ });
return SortedNodes;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
index ddc6e07e2f59..623fe91a5a60 100644
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -31,6 +31,7 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -84,13 +85,15 @@ PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
cl::init(true), cl::Hidden,
cl::desc("Convert align attributes to assumptions during inlining."));
-bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
- AAResults *CalleeAAR, bool InsertLifetime) {
+llvm::InlineResult llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
+ AAResults *CalleeAAR,
+ bool InsertLifetime) {
return InlineFunction(CallSite(CI), IFI, CalleeAAR, InsertLifetime);
}
-bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
- AAResults *CalleeAAR, bool InsertLifetime) {
+llvm::InlineResult llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
+ AAResults *CalleeAAR,
+ bool InsertLifetime) {
return InlineFunction(CallSite(II), IFI, CalleeAAR, InsertLifetime);
}
@@ -768,14 +771,16 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
UnwindDest->removePredecessor(InvokeBB);
}
-/// When inlining a call site that has !llvm.mem.parallel_loop_access metadata,
-/// that metadata should be propagated to all memory-accessing cloned
-/// instructions.
+/// When inlining a call site that has !llvm.mem.parallel_loop_access or
+/// llvm.access.group metadata, that metadata should be propagated to all
+/// memory-accessing cloned instructions.
static void PropagateParallelLoopAccessMetadata(CallSite CS,
ValueToValueMapTy &VMap) {
MDNode *M =
CS.getInstruction()->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
- if (!M)
+ MDNode *CallAccessGroup =
+ CS.getInstruction()->getMetadata(LLVMContext::MD_access_group);
+ if (!M && !CallAccessGroup)
return;
for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
@@ -787,11 +792,20 @@ static void PropagateParallelLoopAccessMetadata(CallSite CS,
if (!NI)
continue;
- if (MDNode *PM = NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) {
+ if (M) {
+ if (MDNode *PM =
+ NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) {
M = MDNode::concatenate(PM, M);
NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
- } else if (NI->mayReadOrWriteMemory()) {
- NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
+ } else if (NI->mayReadOrWriteMemory()) {
+ NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
+ }
+ }
+
+ if (NI->mayReadOrWriteMemory()) {
+ MDNode *UnitedAccGroups = uniteAccessGroups(
+ NI->getMetadata(LLVMContext::MD_access_group), CallAccessGroup);
+ NI->setMetadata(LLVMContext::MD_access_group, UnitedAccGroups);
}
}
}
@@ -985,22 +999,22 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
PtrArgs.push_back(CXI->getPointerOperand());
else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I))
PtrArgs.push_back(RMWI->getPointerOperand());
- else if (ImmutableCallSite ICS = ImmutableCallSite(I)) {
+ else if (const auto *Call = dyn_cast<CallBase>(I)) {
// If we know that the call does not access memory, then we'll still
// know that about the inlined clone of this call site, and we don't
// need to add metadata.
- if (ICS.doesNotAccessMemory())
+ if (Call->doesNotAccessMemory())
continue;
IsFuncCall = true;
if (CalleeAAR) {
- FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(ICS);
+ FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(Call);
if (MRB == FMRB_OnlyAccessesArgumentPointees ||
MRB == FMRB_OnlyReadsArgumentPointees)
IsArgMemOnlyCall = true;
}
- for (Value *Arg : ICS.args()) {
+ for (Value *Arg : Call->args()) {
// We need to check the underlying objects of all arguments, not just
// the pointer arguments, because we might be passing pointers as
// integers, etc.
@@ -1306,16 +1320,10 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
// Check whether this Value is used by a lifetime intrinsic.
static bool isUsedByLifetimeMarker(Value *V) {
- for (User *U : V->users()) {
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
- switch (II->getIntrinsicID()) {
- default: break;
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
+ for (User *U : V->users())
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U))
+ if (II->isLifetimeStartOrEnd())
return true;
- }
- }
- }
return false;
}
@@ -1491,9 +1499,10 @@ static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB,
/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
/// exists in the instruction stream. Similarly this will inline a recursive
/// function by one level.
-bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
- AAResults *CalleeAAR, bool InsertLifetime,
- Function *ForwardVarArgsTo) {
+llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
+ AAResults *CalleeAAR,
+ bool InsertLifetime,
+ Function *ForwardVarArgsTo) {
Instruction *TheCall = CS.getInstruction();
assert(TheCall->getParent() && TheCall->getFunction()
&& "Instruction not in function!");
@@ -1504,7 +1513,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
Function *CalledFunc = CS.getCalledFunction();
if (!CalledFunc || // Can't inline external function or indirect
CalledFunc->isDeclaration()) // call!
- return false;
+ return "external or indirect";
// The inliner does not know how to inline through calls with operand bundles
// in general ...
@@ -1518,7 +1527,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
if (Tag == LLVMContext::OB_funclet)
continue;
- return false;
+ return "unsupported operand bundle";
}
}
@@ -1537,7 +1546,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
if (!Caller->hasGC())
Caller->setGC(CalledFunc->getGC());
else if (CalledFunc->getGC() != Caller->getGC())
- return false;
+ return "incompatible GC";
}
// Get the personality function from the callee if it contains a landing pad.
@@ -1561,7 +1570,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// TODO: This isn't 100% true. Some personality functions are proper
// supersets of others and can be used in place of the other.
else if (CalledPersonality != CallerPersonality)
- return false;
+ return "incompatible personality";
}
// We need to figure out which funclet the callsite was in so that we may
@@ -1586,7 +1595,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// for catchpads.
for (const BasicBlock &CalledBB : *CalledFunc) {
if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHI()))
- return false;
+ return "catch in cleanup funclet";
}
}
} else if (isAsynchronousEHPersonality(Personality)) {
@@ -1594,7 +1603,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// funclet in the callee.
for (const BasicBlock &CalledBB : *CalledFunc) {
if (CalledBB.isEHPad())
- return false;
+ return "SEH in cleanup funclet";
}
}
}
@@ -2244,7 +2253,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Change the branch that used to go to AfterCallBB to branch to the first
// basic block of the inlined function.
//
- TerminatorInst *Br = OrigBB->getTerminator();
+ Instruction *Br = OrigBB->getTerminator();
assert(Br && Br->getOpcode() == Instruction::Br &&
"splitBasicBlock broken!");
Br->setOperand(0, &*FirstNewBlock);
diff --git a/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index 9832a6f24e1f..e1592c867636 100644
--- a/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -487,7 +487,7 @@ void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) {
MDNode *BranchWeights =
MDBuilder(CI->getContext()).createBranchWeights(1, 2000);
- TerminatorInst *NewInst =
+ Instruction *NewInst =
SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights, DT);
BasicBlock *CallBB = NewInst->getParent();
CallBB->setName("cdce.call");
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index ae3cb077a3af..499e611acb57 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -31,8 +31,10 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
@@ -47,6 +49,7 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
@@ -102,8 +105,8 @@ STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
/// DeleteDeadConditions is true.
bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
const TargetLibraryInfo *TLI,
- DeferredDominance *DDT) {
- TerminatorInst *T = BB->getTerminator();
+ DomTreeUpdater *DTU) {
+ Instruction *T = BB->getTerminator();
IRBuilder<> Builder(T);
// Branch - See if we are conditional jumping on constant
@@ -125,8 +128,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// Replace the conditional branch with an unconditional one.
Builder.CreateBr(Destination);
BI->eraseFromParent();
- if (DDT)
- DDT->deleteEdge(BB, OldDest);
+ if (DTU)
+ DTU->deleteEdgeRelaxed(BB, OldDest);
return true;
}
@@ -201,8 +204,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
DefaultDest->removePredecessor(ParentBB);
i = SI->removeCase(i);
e = SI->case_end();
- if (DDT)
- DDT->deleteEdge(ParentBB, DefaultDest);
+ if (DTU)
+ DTU->deleteEdgeRelaxed(ParentBB, DefaultDest);
continue;
}
@@ -229,17 +232,17 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
Builder.CreateBr(TheOnlyDest);
BasicBlock *BB = SI->getParent();
std::vector <DominatorTree::UpdateType> Updates;
- if (DDT)
+ if (DTU)
Updates.reserve(SI->getNumSuccessors() - 1);
// Remove entries from PHI nodes which we no longer branch to...
- for (BasicBlock *Succ : SI->successors()) {
+ for (BasicBlock *Succ : successors(SI)) {
// Found case matching a constant operand?
if (Succ == TheOnlyDest) {
TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest
} else {
Succ->removePredecessor(BB);
- if (DDT)
+ if (DTU)
Updates.push_back({DominatorTree::Delete, BB, Succ});
}
}
@@ -249,8 +252,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
SI->eraseFromParent();
if (DeleteDeadConditions)
RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
- if (DDT)
- DDT->applyUpdates(Updates);
+ if (DTU)
+ DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
return true;
}
@@ -297,7 +300,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) {
BasicBlock *TheOnlyDest = BA->getBasicBlock();
std::vector <DominatorTree::UpdateType> Updates;
- if (DDT)
+ if (DTU)
Updates.reserve(IBI->getNumDestinations() - 1);
// Insert the new branch.
@@ -310,7 +313,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
BasicBlock *ParentBB = IBI->getParent();
BasicBlock *DestBB = IBI->getDestination(i);
DestBB->removePredecessor(ParentBB);
- if (DDT)
+ if (DTU)
Updates.push_back({DominatorTree::Delete, ParentBB, DestBB});
}
}
@@ -327,8 +330,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
new UnreachableInst(BB->getContext(), BB);
}
- if (DDT)
- DDT->applyUpdates(Updates);
+ if (DTU)
+ DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
return true;
}
}
@@ -352,7 +355,7 @@ bool llvm::isInstructionTriviallyDead(Instruction *I,
bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
const TargetLibraryInfo *TLI) {
- if (isa<TerminatorInst>(I))
+ if (I->isTerminator())
return false;
// We don't want the landingpad-like instructions removed by anything this
@@ -390,8 +393,7 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
return true;
// Lifetime intrinsics are dead when their right-hand is undef.
- if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end)
+ if (II->isLifetimeStartOrEnd())
return isa<UndefValue>(II->getArgOperand(1));
// Assumptions are dead if their condition is trivially true. Guards on
@@ -425,22 +427,22 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
/// trivially dead instruction, delete it. If that makes any of its operands
/// trivially dead, delete them too, recursively. Return true if any
/// instructions were deleted.
-bool
-llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
- const TargetLibraryInfo *TLI) {
+bool llvm::RecursivelyDeleteTriviallyDeadInstructions(
+ Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI))
return false;
SmallVector<Instruction*, 16> DeadInsts;
DeadInsts.push_back(I);
- RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI);
+ RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU);
return true;
}
void llvm::RecursivelyDeleteTriviallyDeadInstructions(
- SmallVectorImpl<Instruction *> &DeadInsts, const TargetLibraryInfo *TLI) {
+ SmallVectorImpl<Instruction *> &DeadInsts, const TargetLibraryInfo *TLI,
+ MemorySSAUpdater *MSSAU) {
// Process the dead instruction list until empty.
while (!DeadInsts.empty()) {
Instruction &I = *DeadInsts.pop_back_val();
@@ -467,11 +469,24 @@ void llvm::RecursivelyDeleteTriviallyDeadInstructions(
if (isInstructionTriviallyDead(OpI, TLI))
DeadInsts.push_back(OpI);
}
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(&I);
I.eraseFromParent();
}
}
+bool llvm::replaceDbgUsesWithUndef(Instruction *I) {
+ SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
+ findDbgUsers(DbgUsers, I);
+ for (auto *DII : DbgUsers) {
+ Value *Undef = UndefValue::get(I->getType());
+ DII->setOperand(0, MetadataAsValue::get(DII->getContext(),
+ ValueAsMetadata::get(Undef)));
+ }
+ return !DbgUsers.empty();
+}
+
/// areAllUsesEqual - Check whether the uses of a value are all the same.
/// This is similar to Instruction::hasOneUse() except this will also return
/// true when there are no uses or multiple uses that all refer to the same
@@ -626,7 +641,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB,
/// .. and delete the predecessor corresponding to the '1', this will attempt to
/// recursively fold the and to 0.
void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
- DeferredDominance *DDT) {
+ DomTreeUpdater *DTU) {
// This only adjusts blocks with PHI nodes.
if (!isa<PHINode>(BB->begin()))
return;
@@ -649,17 +664,16 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
// of the block.
if (PhiIt != OldPhiIt) PhiIt = &BB->front();
}
- if (DDT)
- DDT->deleteEdge(Pred, BB);
+ if (DTU)
+ DTU->deleteEdgeRelaxed(Pred, BB);
}
/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
-/// predecessor is known to have one successor (DestBB!). Eliminate the edge
+/// predecessor is known to have one successor (DestBB!). Eliminate the edge
/// between them, moving the instructions in the predecessor into DestBB and
/// deleting the predecessor block.
-void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT,
- DeferredDominance *DDT) {
- assert(!(DT && DDT) && "Cannot call with both DT and DDT.");
+void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB,
+ DomTreeUpdater *DTU) {
// If BB has single-entry PHI nodes, fold them.
while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
@@ -677,11 +691,11 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT,
if (PredBB == &DestBB->getParent()->getEntryBlock())
ReplaceEntryBB = true;
- // Deferred DT update: Collect all the edges that enter PredBB. These
- // dominator edges will be redirected to DestBB.
- std::vector <DominatorTree::UpdateType> Updates;
- if (DDT && !ReplaceEntryBB) {
- Updates.reserve(1 + (2 * pred_size(PredBB)));
+ // DTU updates: Collect all the edges that enter
+ // PredBB. These dominator edges will be redirected to DestBB.
+ SmallVector<DominatorTree::UpdateType, 32> Updates;
+
+ if (DTU) {
Updates.push_back({DominatorTree::Delete, PredBB, DestBB});
for (auto I = pred_begin(PredBB), E = pred_end(PredBB); I != E; ++I) {
Updates.push_back({DominatorTree::Delete, *I, PredBB});
@@ -708,33 +722,32 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT,
// Splice all the instructions from PredBB to DestBB.
PredBB->getTerminator()->eraseFromParent();
DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
+ new UnreachableInst(PredBB->getContext(), PredBB);
// If the PredBB is the entry block of the function, move DestBB up to
// become the entry block after we erase PredBB.
if (ReplaceEntryBB)
DestBB->moveAfter(PredBB);
- if (DT) {
- // For some irreducible CFG we end up having forward-unreachable blocks
- // so check if getNode returns a valid node before updating the domtree.
- if (DomTreeNode *DTN = DT->getNode(PredBB)) {
- BasicBlock *PredBBIDom = DTN->getIDom()->getBlock();
- DT->changeImmediateDominator(DestBB, PredBBIDom);
- DT->eraseNode(PredBB);
+ if (DTU) {
+ assert(PredBB->getInstList().size() == 1 &&
+ isa<UnreachableInst>(PredBB->getTerminator()) &&
+ "The successor list of PredBB isn't empty before "
+ "applying corresponding DTU updates.");
+ DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ DTU->deleteBB(PredBB);
+ // Recalculation of DomTree is needed when updating a forward DomTree and
+ // the Entry BB is replaced.
+ if (ReplaceEntryBB && DTU->hasDomTree()) {
+ // The entry block was removed and there is no external interface for
+ // the dominator tree to be notified of this change. In this corner-case
+ // we recalculate the entire tree.
+ DTU->recalculate(*(DestBB->getParent()));
}
}
- if (DDT) {
- DDT->deleteBB(PredBB); // Deferred deletion of BB.
- if (ReplaceEntryBB)
- // The entry block was removed and there is no external interface for the
- // dominator tree to be notified of this change. In this corner-case we
- // recalculate the entire tree.
- DDT->recalculate(*(DestBB->getParent()));
- else
- DDT->applyUpdates(Updates);
- } else {
- PredBB->eraseFromParent(); // Nuke BB.
+ else {
+ PredBB->eraseFromParent(); // Nuke BB if DTU is nullptr.
}
}
@@ -945,7 +958,7 @@ static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB,
/// eliminate BB by rewriting all the predecessors to branch to the successor
/// block and return true. If we can't transform, return false.
bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
- DeferredDominance *DDT) {
+ DomTreeUpdater *DTU) {
assert(BB != &BB->getParent()->getEntryBlock() &&
"TryToSimplifyUncondBranchFromEmptyBlock called on entry block!");
@@ -986,9 +999,8 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
LLVM_DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
- std::vector<DominatorTree::UpdateType> Updates;
- if (DDT) {
- Updates.reserve(1 + (2 * pred_size(BB)));
+ SmallVector<DominatorTree::UpdateType, 32> Updates;
+ if (DTU) {
Updates.push_back({DominatorTree::Delete, BB, Succ});
// All predecessors of BB will be moved to Succ.
for (auto I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
@@ -1044,9 +1056,16 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
BB->replaceAllUsesWith(Succ);
if (!Succ->hasName()) Succ->takeName(BB);
- if (DDT) {
- DDT->deleteBB(BB); // Deferred deletion of the old basic block.
- DDT->applyUpdates(Updates);
+ // Clear the successor list of BB to match updates applying to DTU later.
+ if (BB->getTerminator())
+ BB->getInstList().pop_back();
+ new UnreachableInst(BB->getContext(), BB);
+ assert(succ_empty(BB) && "The successor list of BB isn't empty before "
+ "applying corresponding DTU updates.");
+
+ if (DTU) {
+ DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ DTU->deleteBB(BB);
} else {
BB->eraseFromParent(); // Delete the old basic block.
}
@@ -1237,7 +1256,7 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar,
/// alloc size of the value when doing the comparison. E.g. an i1 value will be
/// identified as covering an n-bit fragment, if the store size of i1 is at
/// least n bits.
-static bool valueCoversEntireFragment(Type *ValTy, DbgInfoIntrinsic *DII) {
+static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) {
const DataLayout &DL = DII->getModule()->getDataLayout();
uint64_t ValueSize = DL.getTypeAllocSizeInBits(ValTy);
if (auto FragmentSize = DII->getFragmentSizeInBits())
@@ -1255,7 +1274,7 @@ static bool valueCoversEntireFragment(Type *ValTy, DbgInfoIntrinsic *DII) {
/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
-void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
+void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
StoreInst *SI, DIBuilder &Builder) {
assert(DII->isAddressOfVariable());
auto *DIVar = DII->getVariable();
@@ -1278,33 +1297,6 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
return;
}
- // If an argument is zero extended then use argument directly. The ZExt
- // may be zapped by an optimization pass in future.
- Argument *ExtendedArg = nullptr;
- if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
- ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
- if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
- ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
- if (ExtendedArg) {
- // If this DII was already describing only a fragment of a variable, ensure
- // that fragment is appropriately narrowed here.
- // But if a fragment wasn't used, describe the value as the original
- // argument (rather than the zext or sext) so that it remains described even
- // if the sext/zext is optimized away. This widens the variable description,
- // leaving it up to the consumer to know how the smaller value may be
- // represented in a larger register.
- if (auto Fragment = DIExpr->getFragmentInfo()) {
- unsigned FragmentOffset = Fragment->OffsetInBits;
- SmallVector<uint64_t, 3> Ops(DIExpr->elements_begin(),
- DIExpr->elements_end() - 3);
- Ops.push_back(dwarf::DW_OP_LLVM_fragment);
- Ops.push_back(FragmentOffset);
- const DataLayout &DL = DII->getModule()->getDataLayout();
- Ops.push_back(DL.getTypeSizeInBits(ExtendedArg->getType()));
- DIExpr = Builder.createExpression(Ops);
- }
- DV = ExtendedArg;
- }
if (!LdStHasDebugValue(DIVar, DIExpr, SI))
Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, DII->getDebugLoc(),
SI);
@@ -1312,7 +1304,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
-void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
+void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
LoadInst *LI, DIBuilder &Builder) {
auto *DIVar = DII->getVariable();
auto *DIExpr = DII->getExpression();
@@ -1341,7 +1333,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
/// Inserts a llvm.dbg.value intrinsic after a phi that has an associated
/// llvm.dbg.declare or llvm.dbg.addr intrinsic.
-void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
+void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
PHINode *APN, DIBuilder &Builder) {
auto *DIVar = DII->getVariable();
auto *DIExpr = DII->getExpression();
@@ -1443,7 +1435,7 @@ void llvm::insertDebugValuesForPHIs(BasicBlock *BB,
// Map existing PHI nodes to their dbg.values.
ValueToValueMapTy DbgValueMap;
for (auto &I : *BB) {
- if (auto DbgII = dyn_cast<DbgInfoIntrinsic>(&I)) {
+ if (auto DbgII = dyn_cast<DbgVariableIntrinsic>(&I)) {
if (auto *Loc = dyn_cast_or_null<PHINode>(DbgII->getVariableLocation()))
DbgValueMap.insert({Loc, DbgII});
}
@@ -1464,7 +1456,7 @@ void llvm::insertDebugValuesForPHIs(BasicBlock *BB,
for (auto VI : PHI->operand_values()) {
auto V = DbgValueMap.find(VI);
if (V != DbgValueMap.end()) {
- auto *DbgII = cast<DbgInfoIntrinsic>(V->second);
+ auto *DbgII = cast<DbgVariableIntrinsic>(V->second);
Instruction *NewDbgII = DbgII->clone();
NewDbgII->setOperand(0, PhiMAV);
auto InsertionPt = Parent->getFirstInsertionPt();
@@ -1478,7 +1470,7 @@ void llvm::insertDebugValuesForPHIs(BasicBlock *BB,
/// Finds all intrinsics declaring local variables as living in the memory that
/// 'V' points to. This may include a mix of dbg.declare and
/// dbg.addr intrinsics.
-TinyPtrVector<DbgInfoIntrinsic *> llvm::FindDbgAddrUses(Value *V) {
+TinyPtrVector<DbgVariableIntrinsic *> llvm::FindDbgAddrUses(Value *V) {
// This function is hot. Check whether the value has any metadata to avoid a
// DenseMap lookup.
if (!V->isUsedByMetadata())
@@ -1490,9 +1482,9 @@ TinyPtrVector<DbgInfoIntrinsic *> llvm::FindDbgAddrUses(Value *V) {
if (!MDV)
return {};
- TinyPtrVector<DbgInfoIntrinsic *> Declares;
+ TinyPtrVector<DbgVariableIntrinsic *> Declares;
for (User *U : MDV->users()) {
- if (auto *DII = dyn_cast<DbgInfoIntrinsic>(U))
+ if (auto *DII = dyn_cast<DbgVariableIntrinsic>(U))
if (DII->isAddressOfVariable())
Declares.push_back(DII);
}
@@ -1512,7 +1504,7 @@ void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {
DbgValues.push_back(DVI);
}
-void llvm::findDbgUsers(SmallVectorImpl<DbgInfoIntrinsic *> &DbgUsers,
+void llvm::findDbgUsers(SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers,
Value *V) {
// This function is hot. Check whether the value has any metadata to avoid a
// DenseMap lookup.
@@ -1521,7 +1513,7 @@ void llvm::findDbgUsers(SmallVectorImpl<DbgInfoIntrinsic *> &DbgUsers,
if (auto *L = LocalAsMetadata::getIfExists(V))
if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L))
for (User *U : MDV->users())
- if (DbgInfoIntrinsic *DII = dyn_cast<DbgInfoIntrinsic>(U))
+ if (DbgVariableIntrinsic *DII = dyn_cast<DbgVariableIntrinsic>(U))
DbgUsers.push_back(DII);
}
@@ -1529,7 +1521,7 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
Instruction *InsertBefore, DIBuilder &Builder,
bool DerefBefore, int Offset, bool DerefAfter) {
auto DbgAddrs = FindDbgAddrUses(Address);
- for (DbgInfoIntrinsic *DII : DbgAddrs) {
+ for (DbgVariableIntrinsic *DII : DbgAddrs) {
DebugLoc Loc = DII->getDebugLoc();
auto *DIVar = DII->getVariable();
auto *DIExpr = DII->getExpression();
@@ -1597,7 +1589,7 @@ static MetadataAsValue *wrapValueInMetadata(LLVMContext &C, Value *V) {
}
bool llvm::salvageDebugInfo(Instruction &I) {
- SmallVector<DbgInfoIntrinsic *, 1> DbgUsers;
+ SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
findDbgUsers(DbgUsers, &I);
if (DbgUsers.empty())
return false;
@@ -1607,7 +1599,7 @@ bool llvm::salvageDebugInfo(Instruction &I) {
auto &Ctx = I.getContext();
auto wrapMD = [&](Value *V) { return wrapValueInMetadata(Ctx, V); };
- auto doSalvage = [&](DbgInfoIntrinsic *DII, SmallVectorImpl<uint64_t> &Ops) {
+ auto doSalvage = [&](DbgVariableIntrinsic *DII, SmallVectorImpl<uint64_t> &Ops) {
auto *DIExpr = DII->getExpression();
if (!Ops.empty()) {
// Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they
@@ -1621,13 +1613,13 @@ bool llvm::salvageDebugInfo(Instruction &I) {
LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
};
- auto applyOffset = [&](DbgInfoIntrinsic *DII, uint64_t Offset) {
+ auto applyOffset = [&](DbgVariableIntrinsic *DII, uint64_t Offset) {
SmallVector<uint64_t, 8> Ops;
DIExpression::appendOffset(Ops, Offset);
doSalvage(DII, Ops);
};
- auto applyOps = [&](DbgInfoIntrinsic *DII,
+ auto applyOps = [&](DbgVariableIntrinsic *DII,
std::initializer_list<uint64_t> Opcodes) {
SmallVector<uint64_t, 8> Ops(Opcodes);
doSalvage(DII, Ops);
@@ -1726,16 +1718,16 @@ using DbgValReplacement = Optional<DIExpression *>;
/// changes are made.
static bool rewriteDebugUsers(
Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT,
- function_ref<DbgValReplacement(DbgInfoIntrinsic &DII)> RewriteExpr) {
+ function_ref<DbgValReplacement(DbgVariableIntrinsic &DII)> RewriteExpr) {
// Find debug users of From.
- SmallVector<DbgInfoIntrinsic *, 1> Users;
+ SmallVector<DbgVariableIntrinsic *, 1> Users;
findDbgUsers(Users, &From);
if (Users.empty())
return false;
// Prevent use-before-def of To.
bool Changed = false;
- SmallPtrSet<DbgInfoIntrinsic *, 1> DeleteOrSalvage;
+ SmallPtrSet<DbgVariableIntrinsic *, 1> DeleteOrSalvage;
if (isa<Instruction>(&To)) {
bool DomPointAfterFrom = From.getNextNonDebugInstruction() == &DomPoint;
@@ -1824,7 +1816,7 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To,
Type *FromTy = From.getType();
Type *ToTy = To.getType();
- auto Identity = [&](DbgInfoIntrinsic &DII) -> DbgValReplacement {
+ auto Identity = [&](DbgVariableIntrinsic &DII) -> DbgValReplacement {
return DII.getExpression();
};
@@ -1848,7 +1840,7 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To,
// The width of the result has shrunk. Use sign/zero extension to describe
// the source variable's high bits.
- auto SignOrZeroExt = [&](DbgInfoIntrinsic &DII) -> DbgValReplacement {
+ auto SignOrZeroExt = [&](DbgVariableIntrinsic &DII) -> DbgValReplacement {
DILocalVariable *Var = DII.getVariable();
// Without knowing signedness, sign/zero extension isn't possible.
@@ -1902,17 +1894,17 @@ unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
}
unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
- bool PreserveLCSSA, DeferredDominance *DDT) {
+ bool PreserveLCSSA, DomTreeUpdater *DTU) {
BasicBlock *BB = I->getParent();
std::vector <DominatorTree::UpdateType> Updates;
// Loop over all of the successors, removing BB's entry from any PHI
// nodes.
- if (DDT)
+ if (DTU)
Updates.reserve(BB->getTerminator()->getNumSuccessors());
for (BasicBlock *Successor : successors(BB)) {
Successor->removePredecessor(BB, PreserveLCSSA);
- if (DDT)
+ if (DTU)
Updates.push_back({DominatorTree::Delete, BB, Successor});
}
// Insert a call to llvm.trap right before this. This turns the undefined
@@ -1923,7 +1915,8 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
CallTrap->setDebugLoc(I->getDebugLoc());
}
- new UnreachableInst(I->getContext(), I);
+ auto *UI = new UnreachableInst(I->getContext(), I);
+ UI->setDebugLoc(I->getDebugLoc());
// All instructions after this are dead.
unsigned NumInstrsRemoved = 0;
@@ -1934,13 +1927,13 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
BB->getInstList().erase(BBI++);
++NumInstrsRemoved;
}
- if (DDT)
- DDT->applyUpdates(Updates);
+ if (DTU)
+ DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
return NumInstrsRemoved;
}
/// changeToCall - Convert the specified invoke into a normal call.
-static void changeToCall(InvokeInst *II, DeferredDominance *DDT = nullptr) {
+static void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr) {
SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end());
SmallVector<OperandBundleDef, 1> OpBundles;
II->getOperandBundlesAsDefs(OpBundles);
@@ -1950,6 +1943,7 @@ static void changeToCall(InvokeInst *II, DeferredDominance *DDT = nullptr) {
NewCall->setCallingConv(II->getCallingConv());
NewCall->setAttributes(II->getAttributes());
NewCall->setDebugLoc(II->getDebugLoc());
+ NewCall->copyMetadata(*II);
II->replaceAllUsesWith(NewCall);
// Follow the call by a branch to the normal destination.
@@ -1961,8 +1955,8 @@ static void changeToCall(InvokeInst *II, DeferredDominance *DDT = nullptr) {
BasicBlock *UnwindDestBB = II->getUnwindDest();
UnwindDestBB->removePredecessor(BB);
II->eraseFromParent();
- if (DDT)
- DDT->deleteEdge(BB, UnwindDestBB);
+ if (DTU)
+ DTU->deleteEdgeRelaxed(BB, UnwindDestBB);
}
BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
@@ -2003,8 +1997,8 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
}
static bool markAliveBlocks(Function &F,
- SmallPtrSetImpl<BasicBlock*> &Reachable,
- DeferredDominance *DDT = nullptr) {
+ SmallPtrSetImpl<BasicBlock *> &Reachable,
+ DomTreeUpdater *DTU = nullptr) {
SmallVector<BasicBlock*, 128> Worklist;
BasicBlock *BB = &F.front();
Worklist.push_back(BB);
@@ -2029,7 +2023,7 @@ static bool markAliveBlocks(Function &F,
if (IntrinsicID == Intrinsic::assume) {
if (match(CI->getArgOperand(0), m_CombineOr(m_Zero(), m_Undef()))) {
// Don't insert a call to llvm.trap right before the unreachable.
- changeToUnreachable(CI, false, false, DDT);
+ changeToUnreachable(CI, false, false, DTU);
Changed = true;
break;
}
@@ -2046,7 +2040,7 @@ static bool markAliveBlocks(Function &F,
if (match(CI->getArgOperand(0), m_Zero()))
if (!isa<UnreachableInst>(CI->getNextNode())) {
changeToUnreachable(CI->getNextNode(), /*UseLLVMTrap=*/false,
- false, DDT);
+ false, DTU);
Changed = true;
break;
}
@@ -2054,7 +2048,7 @@ static bool markAliveBlocks(Function &F,
} else if ((isa<ConstantPointerNull>(Callee) &&
!NullPointerIsDefined(CI->getFunction())) ||
isa<UndefValue>(Callee)) {
- changeToUnreachable(CI, /*UseLLVMTrap=*/false, false, DDT);
+ changeToUnreachable(CI, /*UseLLVMTrap=*/false, false, DTU);
Changed = true;
break;
}
@@ -2064,7 +2058,7 @@ static bool markAliveBlocks(Function &F,
// though.
if (!isa<UnreachableInst>(CI->getNextNode())) {
// Don't insert a call to llvm.trap right before the unreachable.
- changeToUnreachable(CI->getNextNode(), false, false, DDT);
+ changeToUnreachable(CI->getNextNode(), false, false, DTU);
Changed = true;
}
break;
@@ -2083,21 +2077,21 @@ static bool markAliveBlocks(Function &F,
(isa<ConstantPointerNull>(Ptr) &&
!NullPointerIsDefined(SI->getFunction(),
SI->getPointerAddressSpace()))) {
- changeToUnreachable(SI, true, false, DDT);
+ changeToUnreachable(SI, true, false, DTU);
Changed = true;
break;
}
}
}
- TerminatorInst *Terminator = BB->getTerminator();
+ Instruction *Terminator = BB->getTerminator();
if (auto *II = dyn_cast<InvokeInst>(Terminator)) {
// Turn invokes that call 'nounwind' functions into ordinary calls.
Value *Callee = II->getCalledValue();
if ((isa<ConstantPointerNull>(Callee) &&
!NullPointerIsDefined(BB->getParent())) ||
isa<UndefValue>(Callee)) {
- changeToUnreachable(II, true, false, DDT);
+ changeToUnreachable(II, true, false, DTU);
Changed = true;
} else if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(&F)) {
if (II->use_empty() && II->onlyReadsMemory()) {
@@ -2107,10 +2101,10 @@ static bool markAliveBlocks(Function &F,
BranchInst::Create(NormalDestBB, II);
UnwindDestBB->removePredecessor(II->getParent());
II->eraseFromParent();
- if (DDT)
- DDT->deleteEdge(BB, UnwindDestBB);
+ if (DTU)
+ DTU->deleteEdgeRelaxed(BB, UnwindDestBB);
} else
- changeToCall(II, DDT);
+ changeToCall(II, DTU);
Changed = true;
}
} else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Terminator)) {
@@ -2156,7 +2150,7 @@ static bool markAliveBlocks(Function &F,
}
}
- Changed |= ConstantFoldTerminator(BB, true, nullptr, DDT);
+ Changed |= ConstantFoldTerminator(BB, true, nullptr, DTU);
for (BasicBlock *Successor : successors(BB))
if (Reachable.insert(Successor).second)
Worklist.push_back(Successor);
@@ -2164,15 +2158,15 @@ static bool markAliveBlocks(Function &F,
return Changed;
}
-void llvm::removeUnwindEdge(BasicBlock *BB, DeferredDominance *DDT) {
- TerminatorInst *TI = BB->getTerminator();
+void llvm::removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU) {
+ Instruction *TI = BB->getTerminator();
if (auto *II = dyn_cast<InvokeInst>(TI)) {
- changeToCall(II, DDT);
+ changeToCall(II, DTU);
return;
}
- TerminatorInst *NewTI;
+ Instruction *NewTI;
BasicBlock *UnwindDest;
if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
@@ -2196,8 +2190,8 @@ void llvm::removeUnwindEdge(BasicBlock *BB, DeferredDominance *DDT) {
UnwindDest->removePredecessor(BB);
TI->replaceAllUsesWith(NewTI);
TI->eraseFromParent();
- if (DDT)
- DDT->deleteEdge(BB, UnwindDest);
+ if (DTU)
+ DTU->deleteEdgeRelaxed(BB, UnwindDest);
}
/// removeUnreachableBlocks - Remove blocks that are not reachable, even
@@ -2205,9 +2199,10 @@ void llvm::removeUnwindEdge(BasicBlock *BB, DeferredDominance *DDT) {
/// otherwise. If `LVI` is passed, this function preserves LazyValueInfo
/// after modifying the CFG.
bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
- DeferredDominance *DDT) {
+ DomTreeUpdater *DTU,
+ MemorySSAUpdater *MSSAU) {
SmallPtrSet<BasicBlock*, 16> Reachable;
- bool Changed = markAliveBlocks(F, Reachable, DDT);
+ bool Changed = markAliveBlocks(F, Reachable, DTU);
// If there are unreachable blocks in the CFG...
if (Reachable.size() == F.size())
@@ -2216,45 +2211,68 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
assert(Reachable.size() < F.size());
NumRemoved += F.size()-Reachable.size();
- // Loop over all of the basic blocks that are not reachable, dropping all of
- // their internal references. Update DDT and LVI if available.
- std::vector <DominatorTree::UpdateType> Updates;
+ SmallPtrSet<BasicBlock *, 16> DeadBlockSet;
for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) {
auto *BB = &*I;
if (Reachable.count(BB))
continue;
+ DeadBlockSet.insert(BB);
+ }
+
+ if (MSSAU)
+ MSSAU->removeBlocks(DeadBlockSet);
+
+ // Loop over all of the basic blocks that are not reachable, dropping all of
+ // their internal references. Update DTU and LVI if available.
+ std::vector<DominatorTree::UpdateType> Updates;
+ for (auto *BB : DeadBlockSet) {
for (BasicBlock *Successor : successors(BB)) {
- if (Reachable.count(Successor))
+ if (!DeadBlockSet.count(Successor))
Successor->removePredecessor(BB);
- if (DDT)
+ if (DTU)
Updates.push_back({DominatorTree::Delete, BB, Successor});
}
if (LVI)
LVI->eraseBlock(BB);
BB->dropAllReferences();
}
-
for (Function::iterator I = ++F.begin(); I != F.end();) {
auto *BB = &*I;
if (Reachable.count(BB)) {
++I;
continue;
}
- if (DDT) {
- DDT->deleteBB(BB); // deferred deletion of BB.
+ if (DTU) {
+ // Remove the terminator of BB to clear the successor list of BB.
+ if (BB->getTerminator())
+ BB->getInstList().pop_back();
+ new UnreachableInst(BB->getContext(), BB);
+ assert(succ_empty(BB) && "The successor list of BB isn't empty before "
+ "applying corresponding DTU updates.");
++I;
} else {
I = F.getBasicBlockList().erase(I);
}
}
- if (DDT)
- DDT->applyUpdates(Updates);
+ if (DTU) {
+ DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ bool Deleted = false;
+ for (auto *BB : DeadBlockSet) {
+ if (DTU->isBBPendingDeletion(BB))
+ --NumRemoved;
+ else
+ Deleted = true;
+ DTU->deleteBB(BB);
+ }
+ if (!Deleted)
+ return false;
+ }
return true;
}
void llvm::combineMetadata(Instruction *K, const Instruction *J,
- ArrayRef<unsigned> KnownIDs) {
+ ArrayRef<unsigned> KnownIDs, bool DoesKMove) {
SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
K->dropUnknownNonDebugMetadata(KnownIDs);
K->getAllMetadataOtherThanDebugLoc(Metadata);
@@ -2279,8 +2297,20 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
case LLVMContext::MD_mem_parallel_loop_access:
K->setMetadata(Kind, MDNode::intersect(JMD, KMD));
break;
+ case LLVMContext::MD_access_group:
+ K->setMetadata(LLVMContext::MD_access_group,
+ intersectAccessGroups(K, J));
+ break;
case LLVMContext::MD_range:
- K->setMetadata(Kind, MDNode::getMostGenericRange(JMD, KMD));
+
+ // If K does move, use most generic range. Otherwise keep the range of
+ // K.
+ if (DoesKMove)
+ // FIXME: If K does move, we should drop the range info and nonnull.
+ // Currently this function is used with DoesKMove in passes
+ // doing hoisting/sinking and the current behavior of using the
+ // most generic range is correct in those cases.
+ K->setMetadata(Kind, MDNode::getMostGenericRange(JMD, KMD));
break;
case LLVMContext::MD_fpmath:
K->setMetadata(Kind, MDNode::getMostGenericFPMath(JMD, KMD));
@@ -2290,8 +2320,9 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
K->setMetadata(Kind, JMD);
break;
case LLVMContext::MD_nonnull:
- // Only set the !nonnull if it is present in both instructions.
- K->setMetadata(Kind, JMD);
+ // If K does move, keep nonull if it is present in both instructions.
+ if (DoesKMove)
+ K->setMetadata(Kind, JMD);
break;
case LLVMContext::MD_invariant_group:
// Preserve !invariant.group in K.
@@ -2318,15 +2349,49 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
K->setMetadata(LLVMContext::MD_invariant_group, JMD);
}
-void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J) {
+void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,
+ bool KDominatesJ) {
unsigned KnownIDs[] = {
LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
LLVMContext::MD_noalias, LLVMContext::MD_range,
LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull,
LLVMContext::MD_invariant_group, LLVMContext::MD_align,
LLVMContext::MD_dereferenceable,
- LLVMContext::MD_dereferenceable_or_null};
- combineMetadata(K, J, KnownIDs);
+ LLVMContext::MD_dereferenceable_or_null,
+ LLVMContext::MD_access_group};
+ combineMetadata(K, J, KnownIDs, KDominatesJ);
+}
+
+void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) {
+ auto *ReplInst = dyn_cast<Instruction>(Repl);
+ if (!ReplInst)
+ return;
+
+ // Patch the replacement so that it is not more restrictive than the value
+ // being replaced.
+ // Note that if 'I' is a load being replaced by some operation,
+ // for example, by an arithmetic operation, then andIRFlags()
+ // would just erase all math flags from the original arithmetic
+ // operation, which is clearly not wanted and not needed.
+ if (!isa<LoadInst>(I))
+ ReplInst->andIRFlags(I);
+
+ // FIXME: If both the original and replacement value are part of the
+ // same control-flow region (meaning that the execution of one
+ // guarantees the execution of the other), then we can combine the
+ // noalias scopes here and do better than the general conservative
+ // answer used in combineMetadata().
+
+ // In general, GVN unifies expressions over different control-flow
+ // regions, and so we need a conservative combination of the noalias
+ // scopes.
+ static const unsigned KnownIDs[] = {
+ LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias, LLVMContext::MD_range,
+ LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load,
+ LLVMContext::MD_invariant_group, LLVMContext::MD_nonnull,
+ LLVMContext::MD_access_group};
+ combineMetadata(ReplInst, I, KnownIDs, false);
}
template <typename RootType, typename DominatesFn>
@@ -2454,6 +2519,54 @@ void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
}
}
+void llvm::dropDebugUsers(Instruction &I) {
+ SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
+ findDbgUsers(DbgUsers, &I);
+ for (auto *DII : DbgUsers)
+ DII->eraseFromParent();
+}
+
+void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
+ BasicBlock *BB) {
+ // Since we are moving the instructions out of its basic block, we do not
+ // retain their original debug locations (DILocations) and debug intrinsic
+ // instructions (dbg.values).
+ //
+ // Doing so would degrade the debugging experience and adversely affect the
+ // accuracy of profiling information.
+ //
+ // Currently, when hoisting the instructions, we take the following actions:
+ // - Remove their dbg.values.
+ // - Set their debug locations to the values from the insertion point.
+ //
+ // As per PR39141 (comment #8), the more fundamental reason why the dbg.values
+ // need to be deleted, is because there will not be any instructions with a
+ // DILocation in either branch left after performing the transformation. We
+ // can only insert a dbg.value after the two branches are joined again.
+ //
+ // See PR38762, PR39243 for more details.
+ //
+ // TODO: Extend llvm.dbg.value to take more than one SSA Value (PR39141) to
+ // encode predicated DIExpressions that yield different results on different
+ // code paths.
+ for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
+ Instruction *I = &*II;
+ I->dropUnknownNonDebugMetadata();
+ if (I->isUsedByMetadata())
+ dropDebugUsers(*I);
+ if (isa<DbgVariableIntrinsic>(I)) {
+ // Remove DbgInfo Intrinsics.
+ II = I->eraseFromParent();
+ continue;
+ }
+ I->setDebugLoc(InsertPt->getDebugLoc());
+ ++II;
+ }
+ DomBlock->getInstList().splice(InsertPt->getIterator(), BB->getInstList(),
+ BB->begin(),
+ BB->getTerminator()->getIterator());
+}
+
namespace {
/// A potential constituent of a bitreverse or bswap expression. See
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index 6e92e679f999..41f14a834617 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -20,13 +20,15 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -35,6 +37,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
@@ -53,6 +56,7 @@ class LoopRotate {
AssumptionCache *AC;
DominatorTree *DT;
ScalarEvolution *SE;
+ MemorySSAUpdater *MSSAU;
const SimplifyQuery &SQ;
bool RotationOnly;
bool IsUtilMode;
@@ -60,10 +64,11 @@ class LoopRotate {
public:
LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
const TargetTransformInfo *TTI, AssumptionCache *AC,
- DominatorTree *DT, ScalarEvolution *SE, const SimplifyQuery &SQ,
- bool RotationOnly, bool IsUtilMode)
+ DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+ const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode)
: MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
- SQ(SQ), RotationOnly(RotationOnly), IsUtilMode(IsUtilMode) {}
+ MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
+ IsUtilMode(IsUtilMode) {}
bool processLoop(Loop *L);
private:
@@ -268,6 +273,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
SE->forgetTopmostLoop(L);
LLVM_DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
// Find new Loop header. NewHeader is a Header's one and only successor
// that is inside loop. Header's other successor is outside the
@@ -298,18 +305,18 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// For the rest of the instructions, either hoist to the OrigPreheader if
// possible or create a clone in the OldPreHeader if not.
- TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
+ Instruction *LoopEntryBranch = OrigPreheader->getTerminator();
// Record all debug intrinsics preceding LoopEntryBranch to avoid duplication.
using DbgIntrinsicHash =
std::pair<std::pair<Value *, DILocalVariable *>, DIExpression *>;
- auto makeHash = [](DbgInfoIntrinsic *D) -> DbgIntrinsicHash {
+ auto makeHash = [](DbgVariableIntrinsic *D) -> DbgIntrinsicHash {
return {{D->getVariableLocation(), D->getVariable()}, D->getExpression()};
};
SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics;
for (auto I = std::next(OrigPreheader->rbegin()), E = OrigPreheader->rend();
I != E; ++I) {
- if (auto *DII = dyn_cast<DbgInfoIntrinsic>(&*I))
+ if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&*I))
DbgIntrinsics.insert(makeHash(DII));
else
break;
@@ -325,7 +332,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// something that might trap, but isn't safe to hoist something that reads
// memory (without proving that the loop doesn't write).
if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() &&
- !Inst->mayWriteToMemory() && !isa<TerminatorInst>(Inst) &&
+ !Inst->mayWriteToMemory() && !Inst->isTerminator() &&
!isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) {
Inst->moveBefore(LoopEntryBranch);
continue;
@@ -339,7 +346,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
// Avoid inserting the same intrinsic twice.
- if (auto *DII = dyn_cast<DbgInfoIntrinsic>(C))
+ if (auto *DII = dyn_cast<DbgVariableIntrinsic>(C))
if (DbgIntrinsics.count(makeHash(DII))) {
C->deleteValue();
continue;
@@ -374,8 +381,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// Along with all the other instructions, we just cloned OrigHeader's
// terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
// successors by duplicating their incoming values for OrigHeader.
- TerminatorInst *TI = OrigHeader->getTerminator();
- for (BasicBlock *SuccBB : TI->successors())
+ for (BasicBlock *SuccBB : successors(OrigHeader))
for (BasicBlock::iterator BI = SuccBB->begin();
PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
@@ -385,6 +391,12 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// remove the corresponding incoming values from the PHI nodes in OrigHeader.
LoopEntryBranch->eraseFromParent();
+ // Update MemorySSA before the rewrite call below changes the 1:1
+ // instruction:cloned_instruction_or_value mapping in ValueMap.
+ if (MSSAU) {
+ ValueMap[OrigHeader] = OrigPreheader;
+ MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader, ValueMap);
+ }
SmallVector<PHINode*, 2> InsertedPHIs;
// If there were any uses of instructions in the duplicated block outside the
@@ -411,6 +423,12 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader});
Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader});
DT->applyUpdates(Updates);
+
+ if (MSSAU) {
+ MSSAU->applyUpdates(Updates, *DT);
+ if (VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+ }
}
// At this point, we've finished our major CFG changes. As part of cloning
@@ -433,7 +451,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// Split the edge to form a real preheader.
BasicBlock *NewPH = SplitCriticalEdge(
OrigPreheader, NewHeader,
- CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA());
+ CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
NewPH->setName(NewHeader->getName() + ".lr.ph");
// Preserve canonical loop form, which means that 'Exit' should have only
@@ -452,7 +470,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
SplitLatchEdge |= L->getLoopLatch() == ExitPred;
BasicBlock *ExitSplit = SplitCriticalEdge(
ExitPred, Exit,
- CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA());
+ CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
ExitSplit->moveBefore(Exit);
}
assert(SplitLatchEdge &&
@@ -467,16 +485,27 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// With our CFG finalized, update DomTree if it is available.
if (DT) DT->deleteEdge(OrigPreheader, Exit);
+
+ // Update MSSA too, if available.
+ if (MSSAU)
+ MSSAU->removeEdge(OrigPreheader, Exit);
}
assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
// Now that the CFG and DomTree are in a consistent state again, try to merge
// the OrigHeader block into OrigLatch. This will succeed if they are
// connected by an unconditional branch. This is just a cleanup so the
// emitted code isn't too gross in this common case.
- MergeBlockIntoPredecessor(OrigHeader, DT, LI);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU);
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
LLVM_DEBUG(dbgs() << "LoopRotation: into "; L->dump());
@@ -585,9 +614,14 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) {
<< LastExit->getName() << "\n");
// Hoist the instructions from Latch into LastExit.
+ Instruction *FirstLatchInst = &*(Latch->begin());
LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(),
Latch->begin(), Jmp->getIterator());
+ // Update MemorySSA
+ if (MSSAU)
+ MSSAU->moveAllAfterMergeBlocks(Latch, LastExit, FirstLatchInst);
+
unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
BasicBlock *Header = Jmp->getSuccessor(0);
assert(Header == L->getHeader() && "expected a backward branch");
@@ -603,6 +637,10 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) {
if (DT)
DT->eraseNode(Latch);
Latch->eraseFromParent();
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
return true;
}
@@ -635,11 +673,16 @@ bool LoopRotate::processLoop(Loop *L) {
/// The utility to convert a loop into a loop with bottom test.
bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
AssumptionCache *AC, DominatorTree *DT,
- ScalarEvolution *SE, const SimplifyQuery &SQ,
- bool RotationOnly = true,
+ ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+ const SimplifyQuery &SQ, bool RotationOnly = true,
unsigned Threshold = unsigned(-1),
bool IsUtilMode = true) {
- LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, SQ, RotationOnly, IsUtilMode);
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+ LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
+ IsUtilMode);
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
return LR.processLoop(L);
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index 970494eb4704..380f4fca54d9 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -137,7 +137,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
// Split out the loop pre-header.
BasicBlock *PreheaderBB;
PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT,
- LI, PreserveLCSSA);
+ LI, nullptr, PreserveLCSSA);
if (!PreheaderBB)
return nullptr;
@@ -251,7 +251,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
SE->forgetLoop(L);
BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer",
- DT, LI, PreserveLCSSA);
+ DT, LI, nullptr, PreserveLCSSA);
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
@@ -435,7 +435,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
unsigned LoopMDKind = BEBlock->getContext().getMDKindID("llvm.loop");
MDNode *LoopMD = nullptr;
for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) {
- TerminatorInst *TI = BackedgeBlocks[i]->getTerminator();
+ Instruction *TI = BackedgeBlocks[i]->getTerminator();
if (!LoopMD)
LoopMD = TI->getMetadata(LoopMDKind);
TI->setMetadata(LoopMDKind, nullptr);
@@ -488,7 +488,7 @@ ReprocessLoop:
<< P->getName() << "\n");
// Zap the dead pred's terminator and replace it with unreachable.
- TerminatorInst *TI = P->getTerminator();
+ Instruction *TI = P->getTerminator();
changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA);
Changed = true;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 04b8c1417e0a..da7ed2bd1652 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -54,10 +54,10 @@ UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
static cl::opt<bool>
UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,
cl::desc("Verify domtree after unrolling"),
-#ifdef NDEBUG
- cl::init(false)
-#else
+#ifdef EXPENSIVE_CHECKS
cl::init(true)
+#else
+ cl::init(false)
#endif
);
@@ -275,8 +275,7 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
// inserted code, doing constant propagation and dead code elimination as we
// go.
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
- const std::vector<BasicBlock *> &NewLoopBlocks = L->getBlocks();
- for (BasicBlock *BB : NewLoopBlocks) {
+ for (BasicBlock *BB : L->getBlocks()) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
Instruction *Inst = &*I++;
@@ -330,12 +329,15 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
///
/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
/// DominatorTree if they are non-null.
+///
+/// If RemainderLoop is non-null, it will receive the remainder loop (if
+/// required and not fully unrolled).
LoopUnrollResult llvm::UnrollLoop(
Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime,
bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst,
unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder,
LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
- OptimizationRemarkEmitter *ORE, bool PreserveLCSSA) {
+ OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
@@ -469,7 +471,7 @@ LoopUnrollResult llvm::UnrollLoop(
if (RuntimeTripCount && TripMultiple % Count != 0 &&
!UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
EpilogProfitability, UnrollRemainder, LI, SE,
- DT, AC, PreserveLCSSA)) {
+ DT, AC, PreserveLCSSA, RemainderLoop)) {
if (Force)
RuntimeTripCount = false;
else {
@@ -596,8 +598,15 @@ LoopUnrollResult llvm::UnrollLoop(
for (BasicBlock *BB : L->getBlocks())
for (Instruction &I : *BB)
if (!isa<DbgInfoIntrinsic>(&I))
- if (const DILocation *DIL = I.getDebugLoc())
- I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count));
+ if (const DILocation *DIL = I.getDebugLoc()) {
+ auto NewDIL = DIL->cloneWithDuplicationFactor(Count);
+ if (NewDIL)
+ I.setDebugLoc(NewDIL.getValue());
+ else
+ LLVM_DEBUG(dbgs()
+ << "Failed to create new discriminator: "
+ << DIL->getFilename() << " Line: " << DIL->getLine());
+ }
for (unsigned It = 1; It != Count; ++It) {
std::vector<BasicBlock*> NewBlocks;
@@ -782,7 +791,7 @@ LoopUnrollResult llvm::UnrollLoop(
// there is no such latch.
NewIDom = Latches.back();
for (BasicBlock *IterLatch : Latches) {
- TerminatorInst *Term = IterLatch->getTerminator();
+ Instruction *Term = IterLatch->getTerminator();
if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) {
NewIDom = IterLatch;
break;
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index b919f73c3817..e26762639c13 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -72,7 +72,7 @@ static bool partitionOuterLoopBlocks(Loop *L, Loop *SubLoop,
for (BasicBlock *BB : ForeBlocks) {
if (BB == SubLoopPreHeader)
continue;
- TerminatorInst *TI = BB->getTerminator();
+ Instruction *TI = BB->getTerminator();
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
if (!ForeBlocks.count(TI->getSuccessor(i)))
return false;
@@ -167,12 +167,14 @@ static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header,
isSafeToUnrollAndJam should be used prior to calling this to make sure the
unrolling will be valid. Checking profitablility is also advisable.
+
+ If EpilogueLoop is non-null, it receives the epilogue loop (if it was
+ necessary to create one and not fully unrolled).
*/
-LoopUnrollResult
-llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
- unsigned TripMultiple, bool UnrollRemainder,
- LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
- AssumptionCache *AC, OptimizationRemarkEmitter *ORE) {
+LoopUnrollResult llvm::UnrollAndJamLoop(
+ Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple,
+ bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
+ AssumptionCache *AC, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop) {
// When we enter here we should have already checked that it is safe
BasicBlock *Header = L->getHeader();
@@ -181,7 +183,7 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
// Don't enter the unroll code if there is nothing to do.
if (TripCount == 0 && Count < 2) {
- LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; almost nothing to do\n");
return LoopUnrollResult::Unmodified;
}
@@ -196,7 +198,8 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
if (TripMultiple == 1 || TripMultiple % Count != 0) {
if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false,
/*UseEpilogRemainder*/ true,
- UnrollRemainder, LI, SE, DT, AC, true)) {
+ UnrollRemainder, LI, SE, DT, AC, true,
+ EpilogueLoop)) {
LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be "
"generated when assuming runtime trip count\n");
return LoopUnrollResult::Unmodified;
@@ -297,8 +300,15 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
for (BasicBlock *BB : L->getBlocks())
for (Instruction &I : *BB)
if (!isa<DbgInfoIntrinsic>(&I))
- if (const DILocation *DIL = I.getDebugLoc())
- I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count));
+ if (const DILocation *DIL = I.getDebugLoc()) {
+ auto NewDIL = DIL->cloneWithDuplicationFactor(Count);
+ if (NewDIL)
+ I.setDebugLoc(NewDIL.getValue());
+ else
+ LLVM_DEBUG(dbgs()
+ << "Failed to create new discriminator: "
+ << DIL->getFilename() << " Line: " << DIL->getLine());
+ }
// Copy all blocks
for (unsigned It = 1; It != Count; ++It) {
@@ -619,16 +629,28 @@ static bool checkDependencies(SmallVector<Value *, 4> &Earlier,
if (auto D = DI.depends(Src, Dst, true)) {
assert(D->isOrdered() && "Expected an output, flow or anti dep.");
- if (D->isConfused())
+ if (D->isConfused()) {
+ LLVM_DEBUG(dbgs() << " Confused dependency between:\n"
+ << " " << *Src << "\n"
+ << " " << *Dst << "\n");
return false;
+ }
if (!InnerLoop) {
- if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT)
+ if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT) {
+ LLVM_DEBUG(dbgs() << " > dependency between:\n"
+ << " " << *Src << "\n"
+ << " " << *Dst << "\n");
return false;
+ }
} else {
assert(LoopDepth + 1 <= D->getLevels());
if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT &&
- D->getDirection(LoopDepth + 1) & Dependence::DVEntry::LT)
+ D->getDirection(LoopDepth + 1) & Dependence::DVEntry::LT) {
+ LLVM_DEBUG(dbgs() << " < > dependency between:\n"
+ << " " << *Src << "\n"
+ << " " << *Dst << "\n");
return false;
+ }
}
}
}
@@ -716,38 +738,45 @@ bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
if (SubLoopLatch != SubLoopExit)
return false;
- if (Header->hasAddressTaken() || SubLoopHeader->hasAddressTaken())
+ if (Header->hasAddressTaken() || SubLoopHeader->hasAddressTaken()) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Address taken\n");
return false;
+ }
// Split blocks into Fore/SubLoop/Aft based on dominators
BasicBlockSet SubLoopBlocks;
BasicBlockSet ForeBlocks;
BasicBlockSet AftBlocks;
if (!partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks,
- AftBlocks, &DT))
+ AftBlocks, &DT)) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Incompatible loop layout\n");
return false;
+ }
// Aft blocks may need to move instructions to fore blocks, which becomes more
// difficult if there are multiple (potentially conditionally executed)
// blocks. For now we just exclude loops with multiple aft blocks.
- if (AftBlocks.size() != 1)
+ if (AftBlocks.size() != 1) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Can't currently handle "
+ "multiple blocks after the loop\n");
return false;
+ }
- // Check inner loop IV is consistent between all iterations
- const SCEV *SubLoopBECountSC = SE.getExitCount(SubLoop, SubLoopLatch);
- if (isa<SCEVCouldNotCompute>(SubLoopBECountSC) ||
- !SubLoopBECountSC->getType()->isIntegerTy())
- return false;
- ScalarEvolution::LoopDisposition LD =
- SE.getLoopDisposition(SubLoopBECountSC, L);
- if (LD != ScalarEvolution::LoopInvariant)
+ // Check inner loop backedge count is consistent on all iterations of the
+ // outer loop
+ if (!hasIterationCountInvariantInParent(SubLoop, SE)) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Inner loop iteration count is "
+ "not consistent on each iteration\n");
return false;
+ }
// Check the loop safety info for exceptions.
- LoopSafetyInfo LSI;
- computeLoopSafetyInfo(&LSI, L);
- if (LSI.MayThrow)
+ SimpleLoopSafetyInfo LSI;
+ LSI.computeLoopSafetyInfo(L);
+ if (LSI.anyBlockMayThrow()) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Something may throw\n");
return false;
+ }
// We've ruled out the easy stuff and now need to check that there are no
// interdependencies which may prevent us from moving the:
@@ -772,14 +801,19 @@ bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
}
// Keep going
return true;
- }))
+ })) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't move required "
+ "instructions after subloop to before it\n");
return false;
+ }
// Check for memory dependencies which prohibit the unrolling we are doing.
// Because of the way we are unrolling Fore/Sub/Aft blocks, we need to check
// there are no dependencies between Fore-Sub, Fore-Aft, Sub-Aft and Sub-Sub.
- if (!checkDependencies(L, ForeBlocks, SubLoopBlocks, AftBlocks, DI))
+ if (!checkDependencies(L, ForeBlocks, SubLoopBlocks, AftBlocks, DI)) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; failed dependency check\n");
return false;
+ }
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 78afe748e596..151a285af4e9 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -615,11 +615,17 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
// the original loop body.
if (Iter == 0)
DT->changeImmediateDominator(Exit, cast<BasicBlock>(LVMap[Latch]));
+#ifdef EXPENSIVE_CHECKS
assert(DT->verify(DominatorTree::VerificationLevel::Fast));
+#endif
}
- updateBranchWeights(InsertBot, cast<BranchInst>(VMap[LatchBR]), Iter,
+ auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]);
+ updateBranchWeights(InsertBot, LatchBRCopy, Iter,
PeelCount, ExitWeight);
+ // Remove Loop metadata from the latch branch instruction
+ // because it is not the Loop's latch branch anymore.
+ LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr);
InsertTop = InsertBot;
InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 0057b4ba7ce1..00d2fd2fdbac 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -70,6 +70,17 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
BasicBlock *PreHeader, BasicBlock *NewPreHeader,
ValueToValueMapTy &VMap, DominatorTree *DT,
LoopInfo *LI, bool PreserveLCSSA) {
+ // Loop structure should be the following:
+ // Preheader
+ // PrologHeader
+ // ...
+ // PrologLatch
+ // PrologExit
+ // NewPreheader
+ // Header
+ // ...
+ // Latch
+ // LatchExit
BasicBlock *Latch = L->getLoopLatch();
assert(Latch && "Loop must have a latch");
BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
@@ -83,14 +94,21 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
for (PHINode &PN : Succ->phis()) {
// Add a new PHI node to the prolog end block and add the
// appropriate incoming values.
+ // TODO: This code assumes that the PrologExit (or the LatchExit block for
+ // prolog loop) contains only one predecessor from the loop, i.e. the
+ // PrologLatch. When supporting multiple-exiting block loops, we can have
+ // two or more blocks that have the LatchExit as the target in the
+ // original loop.
PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
PrologExit->getFirstNonPHI());
// Adding a value to the new PHI node from the original loop preheader.
// This is the value that skips all the prolog code.
if (L->contains(&PN)) {
+ // Succ is loop header.
NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader),
PreHeader);
} else {
+ // Succ is LatchExit.
NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader);
}
@@ -124,7 +142,7 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
PrologExitPreds.push_back(PredBB);
SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,
- PreserveLCSSA);
+ nullptr, PreserveLCSSA);
}
// Create a branch around the original loop, which is taken if there are no
@@ -143,7 +161,7 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
// Split the exit to maintain loop canonicalization guarantees
SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));
SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
- PreserveLCSSA);
+ nullptr, PreserveLCSSA);
// Add the branch to the exit block (around the unrolled loop)
B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
InsertPt->eraseFromParent();
@@ -257,7 +275,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
assert(Exit && "Loop must have a single exit block only");
// Split the epilogue exit to maintain loop canonicalization guarantees
SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
- SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI,
+ SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
PreserveLCSSA);
// Add the branch to the exit block (around the unrolling loop)
B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
@@ -267,7 +285,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
// Split the main loop exit to maintain canonicalization guarantees.
SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
- SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI,
+ SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr,
PreserveLCSSA);
}
@@ -380,6 +398,7 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
}
if (CreateRemainderLoop) {
Loop *NewLoop = NewLoops[L];
+ MDNode *LoopID = NewLoop->getLoopID();
assert(NewLoop && "L should have been cloned");
// Only add loop metadata if the loop is not going to be completely
@@ -387,6 +406,16 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
if (UnrollRemainder)
return NewLoop;
+ Optional<MDNode *> NewLoopID = makeFollowupLoopID(
+ LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder});
+ if (NewLoopID.hasValue()) {
+ NewLoop->setLoopID(NewLoopID.getValue());
+
+ // Do not setLoopAlreadyUnrolled if loop attributes have been defined
+ // explicitly.
+ return NewLoop;
+ }
+
// Add unroll disable metadata to disable future unrolling for this loop.
NewLoop->setLoopAlreadyUnrolled();
return NewLoop;
@@ -525,10 +554,10 @@ static bool canProfitablyUnrollMultiExitLoop(
bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
bool AllowExpensiveTripCount,
bool UseEpilogRemainder,
- bool UnrollRemainder,
- LoopInfo *LI, ScalarEvolution *SE,
- DominatorTree *DT, AssumptionCache *AC,
- bool PreserveLCSSA) {
+ bool UnrollRemainder, LoopInfo *LI,
+ ScalarEvolution *SE, DominatorTree *DT,
+ AssumptionCache *AC, bool PreserveLCSSA,
+ Loop **ResultLoop) {
LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
LLVM_DEBUG(L->dump());
LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
@@ -545,13 +574,27 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
BasicBlock *Header = L->getHeader();
BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
+
+ if (!LatchBR || LatchBR->isUnconditional()) {
+ // The loop-rotate pass can be helpful to avoid this in many cases.
+ LLVM_DEBUG(
+ dbgs()
+ << "Loop latch not terminated by a conditional branch.\n");
+ return false;
+ }
+
unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
- // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
- // targets of the Latch be an exit block out of the loop. This needs
- // to be guaranteed by the callers of UnrollRuntimeLoopRemainder.
- assert(!L->contains(LatchExit) &&
- "one of the loop latch successors should be the exit block!");
+
+ if (L->contains(LatchExit)) {
+ // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
+ // targets of the Latch be an exit block out of the loop.
+ LLVM_DEBUG(
+ dbgs()
+ << "One of the loop latch successors must be the exit block.\n");
+ return false;
+ }
+
// These are exit blocks other than the target of the latch exiting block.
SmallVector<BasicBlock *, 4> OtherExits;
bool isMultiExitUnrollingEnabled =
@@ -636,8 +679,8 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
NewPreHeader->setName(PreHeader->getName() + ".new");
// Split LatchExit to create phi nodes from branch above.
SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
- NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa",
- DT, LI, PreserveLCSSA);
+ NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI,
+ nullptr, PreserveLCSSA);
// NewExit gets its DebugLoc from LatchExit, which is not part of the
// original Loop.
// Fix this by setting Loop's DebugLoc to NewExit.
@@ -762,10 +805,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// Now the loop blocks are cloned and the other exiting blocks from the
// remainder are connected to the original Loop's exit blocks. The remaining
// work is to update the phi nodes in the original loop, and take in the
- // values from the cloned region. Also update the dominator info for
- // OtherExits and their immediate successors, since we have new edges into
- // OtherExits.
- SmallPtrSet<BasicBlock*, 8> ImmediateSuccessorsOfExitBlocks;
+ // values from the cloned region.
for (auto *BB : OtherExits) {
for (auto &II : *BB) {
@@ -800,27 +840,30 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
"Breaks the definition of dedicated exits!");
}
#endif
- // Update the dominator info because the immediate dominator is no longer the
- // header of the original Loop. BB has edges both from L and remainder code.
- // Since the preheader determines which loop is run (L or directly jump to
- // the remainder code), we set the immediate dominator as the preheader.
- if (DT) {
- DT->changeImmediateDominator(BB, PreHeader);
- // Also update the IDom for immediate successors of BB. If the current
- // IDom is the header, update the IDom to be the preheader because that is
- // the nearest common dominator of all predecessors of SuccBB. We need to
- // check for IDom being the header because successors of exit blocks can
- // have edges from outside the loop, and we should not incorrectly update
- // the IDom in that case.
- for (BasicBlock *SuccBB: successors(BB))
- if (ImmediateSuccessorsOfExitBlocks.insert(SuccBB).second) {
- if (DT->getNode(SuccBB)->getIDom()->getBlock() == Header) {
- assert(!SuccBB->getSinglePredecessor() &&
- "BB should be the IDom then!");
- DT->changeImmediateDominator(SuccBB, PreHeader);
- }
- }
+ }
+
+ // Update the immediate dominator of the exit blocks and blocks that are
+ // reachable from the exit blocks. This is needed because we now have paths
+ // from both the original loop and the remainder code reaching the exit
+ // blocks. While the IDom of these exit blocks were from the original loop,
+ // now the IDom is the preheader (which decides whether the original loop or
+ // remainder code should run).
+ if (DT && !L->getExitingBlock()) {
+ SmallVector<BasicBlock *, 16> ChildrenToUpdate;
+ // NB! We have to examine the dom children of all loop blocks, not just
+ // those which are the IDom of the exit blocks. This is because blocks
+ // reachable from the exit blocks can have their IDom as the nearest common
+ // dominator of the exit blocks.
+ for (auto *BB : L->blocks()) {
+ auto *DomNodeBB = DT->getNode(BB);
+ for (auto *DomChild : DomNodeBB->getChildren()) {
+ auto *DomChildBB = DomChild->getBlock();
+ if (!L->contains(LI->getLoopFor(DomChildBB)))
+ ChildrenToUpdate.push_back(DomChildBB);
+ }
}
+ for (auto *BB : ChildrenToUpdate)
+ DT->changeImmediateDominator(BB, PreHeader);
}
// Loop structure should be the following:
@@ -884,6 +927,12 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// of its parent loops, so the Scalar Evolution pass needs to be run again.
SE->forgetTopmostLoop(L);
+ // Verify that the Dom Tree is correct.
+#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
+ if (DT)
+ assert(DT->verify(DominatorTree::VerificationLevel::Full));
+#endif
+
// Canonicalize to LoopSimplifyForm both original and remainder loops. We
// cannot rely on the LoopUnrollPass to do this because it only does
// canonicalization for parent/subloops and not the sibling loops.
@@ -897,16 +946,20 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA);
}
+ auto UnrollResult = LoopUnrollResult::Unmodified;
if (remainderLoop && UnrollRemainder) {
LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
- UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1,
- /*Force*/ false, /*AllowRuntime*/ false,
- /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
- /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
- /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC,
- /*ORE*/ nullptr, PreserveLCSSA);
+ UnrollResult =
+ UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1,
+ /*Force*/ false, /*AllowRuntime*/ false,
+ /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
+ /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
+ /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC,
+ /*ORE*/ nullptr, PreserveLCSSA);
}
+ if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
+ *ResultLoop = remainderLoop;
NumRuntimeUnrolled++;
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 46af120a428b..a93d1aeb62ef 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -26,8 +26,11 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
@@ -41,1104 +44,7 @@ using namespace llvm::PatternMatch;
#define DEBUG_TYPE "loop-utils"
-bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,
- SmallPtrSetImpl<Instruction *> &Set) {
- for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
- if (!Set.count(dyn_cast<Instruction>(*Use)))
- return false;
- return true;
-}
-
-bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) {
- switch (Kind) {
- default:
- break;
- case RK_IntegerAdd:
- case RK_IntegerMult:
- case RK_IntegerOr:
- case RK_IntegerAnd:
- case RK_IntegerXor:
- case RK_IntegerMinMax:
- return true;
- }
- return false;
-}
-
-bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) {
- return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind);
-}
-
-bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) {
- switch (Kind) {
- default:
- break;
- case RK_IntegerAdd:
- case RK_IntegerMult:
- case RK_FloatAdd:
- case RK_FloatMult:
- return true;
- }
- return false;
-}
-
-/// Determines if Phi may have been type-promoted. If Phi has a single user
-/// that ANDs the Phi with a type mask, return the user. RT is updated to
-/// account for the narrower bit width represented by the mask, and the AND
-/// instruction is added to CI.
-static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
- SmallPtrSetImpl<Instruction *> &Visited,
- SmallPtrSetImpl<Instruction *> &CI) {
- if (!Phi->hasOneUse())
- return Phi;
-
- const APInt *M = nullptr;
- Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
-
- // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
- // with a new integer type of the corresponding bit width.
- if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) {
- int32_t Bits = (*M + 1).exactLogBase2();
- if (Bits > 0) {
- RT = IntegerType::get(Phi->getContext(), Bits);
- Visited.insert(Phi);
- CI.insert(J);
- return J;
- }
- }
- return Phi;
-}
-
-/// Compute the minimal bit width needed to represent a reduction whose exit
-/// instruction is given by Exit.
-static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
- DemandedBits *DB,
- AssumptionCache *AC,
- DominatorTree *DT) {
- bool IsSigned = false;
- const DataLayout &DL = Exit->getModule()->getDataLayout();
- uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());
-
- if (DB) {
- // Use the demanded bits analysis to determine the bits that are live out
- // of the exit instruction, rounding up to the nearest power of two. If the
- // use of demanded bits results in a smaller bit width, we know the value
- // must be positive (i.e., IsSigned = false), because if this were not the
- // case, the sign bit would have been demanded.
- auto Mask = DB->getDemandedBits(Exit);
- MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros();
- }
-
- if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
- // If demanded bits wasn't able to limit the bit width, we can try to use
- // value tracking instead. This can be the case, for example, if the value
- // may be negative.
- auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT);
- auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());
- MaxBitWidth = NumTypeBits - NumSignBits;
- KnownBits Bits = computeKnownBits(Exit, DL);
- if (!Bits.isNonNegative()) {
- // If the value is not known to be non-negative, we set IsSigned to true,
- // meaning that we will use sext instructions instead of zext
- // instructions to restore the original type.
- IsSigned = true;
- if (!Bits.isNegative())
- // If the value is not known to be negative, we don't known what the
- // upper bit is, and therefore, we don't know what kind of extend we
- // will need. In this case, just increase the bit width by one bit and
- // use sext.
- ++MaxBitWidth;
- }
- }
- if (!isPowerOf2_64(MaxBitWidth))
- MaxBitWidth = NextPowerOf2(MaxBitWidth);
-
- return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
- IsSigned);
-}
-
-/// Collect cast instructions that can be ignored in the vectorizer's cost
-/// model, given a reduction exit value and the minimal type in which the
-/// reduction can be represented.
-static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit,
- Type *RecurrenceType,
- SmallPtrSetImpl<Instruction *> &Casts) {
-
- SmallVector<Instruction *, 8> Worklist;
- SmallPtrSet<Instruction *, 8> Visited;
- Worklist.push_back(Exit);
-
- while (!Worklist.empty()) {
- Instruction *Val = Worklist.pop_back_val();
- Visited.insert(Val);
- if (auto *Cast = dyn_cast<CastInst>(Val))
- if (Cast->getSrcTy() == RecurrenceType) {
- // If the source type of a cast instruction is equal to the recurrence
- // type, it will be eliminated, and should be ignored in the vectorizer
- // cost model.
- Casts.insert(Cast);
- continue;
- }
-
- // Add all operands to the work list if they are loop-varying values that
- // we haven't yet visited.
- for (Value *O : cast<User>(Val)->operands())
- if (auto *I = dyn_cast<Instruction>(O))
- if (TheLoop->contains(I) && !Visited.count(I))
- Worklist.push_back(I);
- }
-}
-
-bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
- Loop *TheLoop, bool HasFunNoNaNAttr,
- RecurrenceDescriptor &RedDes,
- DemandedBits *DB,
- AssumptionCache *AC,
- DominatorTree *DT) {
- if (Phi->getNumIncomingValues() != 2)
- return false;
-
- // Reduction variables are only found in the loop header block.
- if (Phi->getParent() != TheLoop->getHeader())
- return false;
-
- // Obtain the reduction start value from the value that comes from the loop
- // preheader.
- Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
-
- // ExitInstruction is the single value which is used outside the loop.
- // We only allow for a single reduction value to be used outside the loop.
- // This includes users of the reduction, variables (which form a cycle
- // which ends in the phi node).
- Instruction *ExitInstruction = nullptr;
- // Indicates that we found a reduction operation in our scan.
- bool FoundReduxOp = false;
-
- // We start with the PHI node and scan for all of the users of this
- // instruction. All users must be instructions that can be used as reduction
- // variables (such as ADD). We must have a single out-of-block user. The cycle
- // must include the original PHI.
- bool FoundStartPHI = false;
-
- // To recognize min/max patterns formed by a icmp select sequence, we store
- // the number of instruction we saw from the recognized min/max pattern,
- // to make sure we only see exactly the two instructions.
- unsigned NumCmpSelectPatternInst = 0;
- InstDesc ReduxDesc(false, nullptr);
-
- // Data used for determining if the recurrence has been type-promoted.
- Type *RecurrenceType = Phi->getType();
- SmallPtrSet<Instruction *, 4> CastInsts;
- Instruction *Start = Phi;
- bool IsSigned = false;
-
- SmallPtrSet<Instruction *, 8> VisitedInsts;
- SmallVector<Instruction *, 8> Worklist;
-
- // Return early if the recurrence kind does not match the type of Phi. If the
- // recurrence kind is arithmetic, we attempt to look through AND operations
- // resulting from the type promotion performed by InstCombine. Vector
- // operations are not limited to the legal integer widths, so we may be able
- // to evaluate the reduction in the narrower width.
- if (RecurrenceType->isFloatingPointTy()) {
- if (!isFloatingPointRecurrenceKind(Kind))
- return false;
- } else {
- if (!isIntegerRecurrenceKind(Kind))
- return false;
- if (isArithmeticRecurrenceKind(Kind))
- Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
- }
-
- Worklist.push_back(Start);
- VisitedInsts.insert(Start);
-
- // A value in the reduction can be used:
- // - By the reduction:
- // - Reduction operation:
- // - One use of reduction value (safe).
- // - Multiple use of reduction value (not safe).
- // - PHI:
- // - All uses of the PHI must be the reduction (safe).
- // - Otherwise, not safe.
- // - By instructions outside of the loop (safe).
- // * One value may have several outside users, but all outside
- // uses must be of the same value.
- // - By an instruction that is not part of the reduction (not safe).
- // This is either:
- // * An instruction type other than PHI or the reduction operation.
- // * A PHI in the header other than the initial PHI.
- while (!Worklist.empty()) {
- Instruction *Cur = Worklist.back();
- Worklist.pop_back();
-
- // No Users.
- // If the instruction has no users then this is a broken chain and can't be
- // a reduction variable.
- if (Cur->use_empty())
- return false;
-
- bool IsAPhi = isa<PHINode>(Cur);
-
- // A header PHI use other than the original PHI.
- if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
- return false;
-
- // Reductions of instructions such as Div, and Sub is only possible if the
- // LHS is the reduction variable.
- if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
- !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
- !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
- return false;
-
- // Any reduction instruction must be of one of the allowed kinds. We ignore
- // the starting value (the Phi or an AND instruction if the Phi has been
- // type-promoted).
- if (Cur != Start) {
- ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
- if (!ReduxDesc.isRecurrence())
- return false;
- }
-
- // A reduction operation must only have one use of the reduction value.
- if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax &&
- hasMultipleUsesOf(Cur, VisitedInsts))
- return false;
-
- // All inputs to a PHI node must be a reduction value.
- if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
- return false;
-
- if (Kind == RK_IntegerMinMax &&
- (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
- ++NumCmpSelectPatternInst;
- if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
- ++NumCmpSelectPatternInst;
-
- // Check whether we found a reduction operator.
- FoundReduxOp |= !IsAPhi && Cur != Start;
-
- // Process users of current instruction. Push non-PHI nodes after PHI nodes
- // onto the stack. This way we are going to have seen all inputs to PHI
- // nodes once we get to them.
- SmallVector<Instruction *, 8> NonPHIs;
- SmallVector<Instruction *, 8> PHIs;
- for (User *U : Cur->users()) {
- Instruction *UI = cast<Instruction>(U);
-
- // Check if we found the exit user.
- BasicBlock *Parent = UI->getParent();
- if (!TheLoop->contains(Parent)) {
- // If we already know this instruction is used externally, move on to
- // the next user.
- if (ExitInstruction == Cur)
- continue;
-
- // Exit if you find multiple values used outside or if the header phi
- // node is being used. In this case the user uses the value of the
- // previous iteration, in which case we would loose "VF-1" iterations of
- // the reduction operation if we vectorize.
- if (ExitInstruction != nullptr || Cur == Phi)
- return false;
-
- // The instruction used by an outside user must be the last instruction
- // before we feed back to the reduction phi. Otherwise, we loose VF-1
- // operations on the value.
- if (!is_contained(Phi->operands(), Cur))
- return false;
-
- ExitInstruction = Cur;
- continue;
- }
-
- // Process instructions only once (termination). Each reduction cycle
- // value must only be used once, except by phi nodes and min/max
- // reductions which are represented as a cmp followed by a select.
- InstDesc IgnoredVal(false, nullptr);
- if (VisitedInsts.insert(UI).second) {
- if (isa<PHINode>(UI))
- PHIs.push_back(UI);
- else
- NonPHIs.push_back(UI);
- } else if (!isa<PHINode>(UI) &&
- ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
- !isa<SelectInst>(UI)) ||
- !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence()))
- return false;
-
- // Remember that we completed the cycle.
- if (UI == Phi)
- FoundStartPHI = true;
- }
- Worklist.append(PHIs.begin(), PHIs.end());
- Worklist.append(NonPHIs.begin(), NonPHIs.end());
- }
-
- // This means we have seen one but not the other instruction of the
- // pattern or more than just a select and cmp.
- if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
- NumCmpSelectPatternInst != 2)
- return false;
-
- if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
- return false;
-
- if (Start != Phi) {
- // If the starting value is not the same as the phi node, we speculatively
- // looked through an 'and' instruction when evaluating a potential
- // arithmetic reduction to determine if it may have been type-promoted.
- //
- // We now compute the minimal bit width that is required to represent the
- // reduction. If this is the same width that was indicated by the 'and', we
- // can represent the reduction in the smaller type. The 'and' instruction
- // will be eliminated since it will essentially be a cast instruction that
- // can be ignore in the cost model. If we compute a different type than we
- // did when evaluating the 'and', the 'and' will not be eliminated, and we
- // will end up with different kinds of operations in the recurrence
- // expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is
- // the case.
- //
- // The vectorizer relies on InstCombine to perform the actual
- // type-shrinking. It does this by inserting instructions to truncate the
- // exit value of the reduction to the width indicated by RecurrenceType and
- // then extend this value back to the original width. If IsSigned is false,
- // a 'zext' instruction will be generated; otherwise, a 'sext' will be
- // used.
- //
- // TODO: We should not rely on InstCombine to rewrite the reduction in the
- // smaller type. We should just generate a correctly typed expression
- // to begin with.
- Type *ComputedType;
- std::tie(ComputedType, IsSigned) =
- computeRecurrenceType(ExitInstruction, DB, AC, DT);
- if (ComputedType != RecurrenceType)
- return false;
-
- // The recurrence expression will be represented in a narrower type. If
- // there are any cast instructions that will be unnecessary, collect them
- // in CastInsts. Note that the 'and' instruction was already included in
- // this list.
- //
- // TODO: A better way to represent this may be to tag in some way all the
- // instructions that are a part of the reduction. The vectorizer cost
- // model could then apply the recurrence type to these instructions,
- // without needing a white list of instructions to ignore.
- collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts);
- }
-
- // We found a reduction var if we have reached the original phi node and we
- // only have a single instruction with out-of-loop users.
-
- // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
- // is saved as part of the RecurrenceDescriptor.
-
- // Save the description of this reduction variable.
- RecurrenceDescriptor RD(
- RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(),
- ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);
- RedDes = RD;
-
- return true;
-}
-
-/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
-/// pattern corresponding to a min(X, Y) or max(X, Y).
-RecurrenceDescriptor::InstDesc
-RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) {
-
- assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
- "Expect a select instruction");
- Instruction *Cmp = nullptr;
- SelectInst *Select = nullptr;
-
- // We must handle the select(cmp()) as a single instruction. Advance to the
- // select.
- if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
- if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin())))
- return InstDesc(false, I);
- return InstDesc(Select, Prev.getMinMaxKind());
- }
-
- // Only handle single use cases for now.
- if (!(Select = dyn_cast<SelectInst>(I)))
- return InstDesc(false, I);
- if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
- !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
- return InstDesc(false, I);
- if (!Cmp->hasOneUse())
- return InstDesc(false, I);
-
- Value *CmpLeft;
- Value *CmpRight;
-
- // Look for a min/max pattern.
- if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_UIntMin);
- else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_UIntMax);
- else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_SIntMax);
- else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_SIntMin);
- else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_FloatMin);
- else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_FloatMax);
- else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_FloatMin);
- else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_FloatMax);
-
- return InstDesc(false, I);
-}
-
-RecurrenceDescriptor::InstDesc
-RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
- InstDesc &Prev, bool HasFunNoNaNAttr) {
- bool FP = I->getType()->isFloatingPointTy();
- Instruction *UAI = Prev.getUnsafeAlgebraInst();
- if (!UAI && FP && !I->isFast())
- UAI = I; // Found an unsafe (unvectorizable) algebra instruction.
-
- switch (I->getOpcode()) {
- default:
- return InstDesc(false, I);
- case Instruction::PHI:
- return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst());
- case Instruction::Sub:
- case Instruction::Add:
- return InstDesc(Kind == RK_IntegerAdd, I);
- case Instruction::Mul:
- return InstDesc(Kind == RK_IntegerMult, I);
- case Instruction::And:
- return InstDesc(Kind == RK_IntegerAnd, I);
- case Instruction::Or:
- return InstDesc(Kind == RK_IntegerOr, I);
- case Instruction::Xor:
- return InstDesc(Kind == RK_IntegerXor, I);
- case Instruction::FMul:
- return InstDesc(Kind == RK_FloatMult, I, UAI);
- case Instruction::FSub:
- case Instruction::FAdd:
- return InstDesc(Kind == RK_FloatAdd, I, UAI);
- case Instruction::FCmp:
- case Instruction::ICmp:
- case Instruction::Select:
- if (Kind != RK_IntegerMinMax &&
- (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
- return InstDesc(false, I);
- return isMinMaxSelectCmpPattern(I, Prev);
- }
-}
-
-bool RecurrenceDescriptor::hasMultipleUsesOf(
- Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) {
- unsigned NumUses = 0;
- for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E;
- ++Use) {
- if (Insts.count(dyn_cast<Instruction>(*Use)))
- ++NumUses;
- if (NumUses > 1)
- return true;
- }
-
- return false;
-}
-bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
- RecurrenceDescriptor &RedDes,
- DemandedBits *DB, AssumptionCache *AC,
- DominatorTree *DT) {
-
- BasicBlock *Header = TheLoop->getHeader();
- Function &F = *Header->getParent();
- bool HasFunNoNaNAttr =
- F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
-
- if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
- AC, DT)) {
- LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
- return true;
- }
- if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
- AC, DT)) {
- LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
- return true;
- }
- if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB,
- AC, DT)) {
- LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
- return true;
- }
- if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
- AC, DT)) {
- LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
- return true;
- }
- if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB,
- AC, DT)) {
- LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
- return true;
- }
- if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes,
- DB, AC, DT)) {
- LLVM_DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n");
- return true;
- }
- if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
- AC, DT)) {
- LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
- return true;
- }
- if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
- AC, DT)) {
- LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
- return true;
- }
- if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB,
- AC, DT)) {
- LLVM_DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi
- << "\n");
- return true;
- }
- // Not a reduction of known type.
- return false;
-}
-
-bool RecurrenceDescriptor::isFirstOrderRecurrence(
- PHINode *Phi, Loop *TheLoop,
- DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
-
- // Ensure the phi node is in the loop header and has two incoming values.
- if (Phi->getParent() != TheLoop->getHeader() ||
- Phi->getNumIncomingValues() != 2)
- return false;
-
- // Ensure the loop has a preheader and a single latch block. The loop
- // vectorizer will need the latch to set up the next iteration of the loop.
- auto *Preheader = TheLoop->getLoopPreheader();
- auto *Latch = TheLoop->getLoopLatch();
- if (!Preheader || !Latch)
- return false;
-
- // Ensure the phi node's incoming blocks are the loop preheader and latch.
- if (Phi->getBasicBlockIndex(Preheader) < 0 ||
- Phi->getBasicBlockIndex(Latch) < 0)
- return false;
-
- // Get the previous value. The previous value comes from the latch edge while
- // the initial value comes form the preheader edge.
- auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
- if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
- SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
- return false;
-
- // Ensure every user of the phi node is dominated by the previous value.
- // The dominance requirement ensures the loop vectorizer will not need to
- // vectorize the initial value prior to the first iteration of the loop.
- // TODO: Consider extending this sinking to handle other kinds of instructions
- // and expressions, beyond sinking a single cast past Previous.
- if (Phi->hasOneUse()) {
- auto *I = Phi->user_back();
- if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() &&
- DT->dominates(Previous, I->user_back())) {
- if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking.
- SinkAfter[I] = Previous;
- return true;
- }
- }
-
- for (User *U : Phi->users())
- if (auto *I = dyn_cast<Instruction>(U)) {
- if (!DT->dominates(Previous, I))
- return false;
- }
-
- return true;
-}
-
-/// This function returns the identity element (or neutral element) for
-/// the operation K.
-Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K,
- Type *Tp) {
- switch (K) {
- case RK_IntegerXor:
- case RK_IntegerAdd:
- case RK_IntegerOr:
- // Adding, Xoring, Oring zero to a number does not change it.
- return ConstantInt::get(Tp, 0);
- case RK_IntegerMult:
- // Multiplying a number by 1 does not change it.
- return ConstantInt::get(Tp, 1);
- case RK_IntegerAnd:
- // AND-ing a number with an all-1 value does not change it.
- return ConstantInt::get(Tp, -1, true);
- case RK_FloatMult:
- // Multiplying a number by 1 does not change it.
- return ConstantFP::get(Tp, 1.0L);
- case RK_FloatAdd:
- // Adding zero to a number does not change it.
- return ConstantFP::get(Tp, 0.0L);
- default:
- llvm_unreachable("Unknown recurrence kind");
- }
-}
-
-/// This function translates the recurrence kind to an LLVM binary operator.
-unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) {
- switch (Kind) {
- case RK_IntegerAdd:
- return Instruction::Add;
- case RK_IntegerMult:
- return Instruction::Mul;
- case RK_IntegerOr:
- return Instruction::Or;
- case RK_IntegerAnd:
- return Instruction::And;
- case RK_IntegerXor:
- return Instruction::Xor;
- case RK_FloatMult:
- return Instruction::FMul;
- case RK_FloatAdd:
- return Instruction::FAdd;
- case RK_IntegerMinMax:
- return Instruction::ICmp;
- case RK_FloatMinMax:
- return Instruction::FCmp;
- default:
- llvm_unreachable("Unknown recurrence operation");
- }
-}
-
-Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
- MinMaxRecurrenceKind RK,
- Value *Left, Value *Right) {
- CmpInst::Predicate P = CmpInst::ICMP_NE;
- switch (RK) {
- default:
- llvm_unreachable("Unknown min/max recurrence kind");
- case MRK_UIntMin:
- P = CmpInst::ICMP_ULT;
- break;
- case MRK_UIntMax:
- P = CmpInst::ICMP_UGT;
- break;
- case MRK_SIntMin:
- P = CmpInst::ICMP_SLT;
- break;
- case MRK_SIntMax:
- P = CmpInst::ICMP_SGT;
- break;
- case MRK_FloatMin:
- P = CmpInst::FCMP_OLT;
- break;
- case MRK_FloatMax:
- P = CmpInst::FCMP_OGT;
- break;
- }
-
- // We only match FP sequences that are 'fast', so we can unconditionally
- // set it on any generated instructions.
- IRBuilder<>::FastMathFlagGuard FMFG(Builder);
- FastMathFlags FMF;
- FMF.setFast();
- Builder.setFastMathFlags(FMF);
-
- Value *Cmp;
- if (RK == MRK_FloatMin || RK == MRK_FloatMax)
- Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
- else
- Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
-
- Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
- return Select;
-}
-
-InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
- const SCEV *Step, BinaryOperator *BOp,
- SmallVectorImpl<Instruction *> *Casts)
- : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
- assert(IK != IK_NoInduction && "Not an induction");
-
- // Start value type should match the induction kind and the value
- // itself should not be null.
- assert(StartValue && "StartValue is null");
- assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
- "StartValue is not a pointer for pointer induction");
- assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
- "StartValue is not an integer for integer induction");
-
- // Check the Step Value. It should be non-zero integer value.
- assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
- "Step value is zero");
-
- assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
- "Step value should be constant for pointer induction");
- assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
- "StepValue is not an integer");
-
- assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
- "StepValue is not FP for FpInduction");
- assert((IK != IK_FpInduction || (InductionBinOp &&
- (InductionBinOp->getOpcode() == Instruction::FAdd ||
- InductionBinOp->getOpcode() == Instruction::FSub))) &&
- "Binary opcode should be specified for FP induction");
-
- if (Casts) {
- for (auto &Inst : *Casts) {
- RedundantCasts.push_back(Inst);
- }
- }
-}
-
-int InductionDescriptor::getConsecutiveDirection() const {
- ConstantInt *ConstStep = getConstIntStepValue();
- if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne()))
- return ConstStep->getSExtValue();
- return 0;
-}
-
-ConstantInt *InductionDescriptor::getConstIntStepValue() const {
- if (isa<SCEVConstant>(Step))
- return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
- return nullptr;
-}
-
-Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index,
- ScalarEvolution *SE,
- const DataLayout& DL) const {
-
- SCEVExpander Exp(*SE, DL, "induction");
- assert(Index->getType() == Step->getType() &&
- "Index type does not match StepValue type");
- switch (IK) {
- case IK_IntInduction: {
- assert(Index->getType() == StartValue->getType() &&
- "Index type does not match StartValue type");
-
- // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution
- // and calculate (Start + Index * Step) for all cases, without
- // special handling for "isOne" and "isMinusOne".
- // But in the real life the result code getting worse. We mix SCEV
- // expressions and ADD/SUB operations and receive redundant
- // intermediate values being calculated in different ways and
- // Instcombine is unable to reduce them all.
-
- if (getConstIntStepValue() &&
- getConstIntStepValue()->isMinusOne())
- return B.CreateSub(StartValue, Index);
- if (getConstIntStepValue() &&
- getConstIntStepValue()->isOne())
- return B.CreateAdd(StartValue, Index);
- const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue),
- SE->getMulExpr(Step, SE->getSCEV(Index)));
- return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint());
- }
- case IK_PtrInduction: {
- assert(isa<SCEVConstant>(Step) &&
- "Expected constant step for pointer induction");
- const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step);
- Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint());
- return B.CreateGEP(nullptr, StartValue, Index);
- }
- case IK_FpInduction: {
- assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value");
- assert(InductionBinOp &&
- (InductionBinOp->getOpcode() == Instruction::FAdd ||
- InductionBinOp->getOpcode() == Instruction::FSub) &&
- "Original bin op should be defined for FP induction");
-
- Value *StepValue = cast<SCEVUnknown>(Step)->getValue();
-
- // Floating point operations had to be 'fast' to enable the induction.
- FastMathFlags Flags;
- Flags.setFast();
-
- Value *MulExp = B.CreateFMul(StepValue, Index);
- if (isa<Instruction>(MulExp))
- // We have to check, the MulExp may be a constant.
- cast<Instruction>(MulExp)->setFastMathFlags(Flags);
-
- Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue,
- MulExp, "induction");
- if (isa<Instruction>(BOp))
- cast<Instruction>(BOp)->setFastMathFlags(Flags);
-
- return BOp;
- }
- case IK_NoInduction:
- return nullptr;
- }
- llvm_unreachable("invalid enum");
-}
-
-bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
- ScalarEvolution *SE,
- InductionDescriptor &D) {
-
- // Here we only handle FP induction variables.
- assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
-
- if (TheLoop->getHeader() != Phi->getParent())
- return false;
-
- // The loop may have multiple entrances or multiple exits; we can analyze
- // this phi if it has a unique entry value and a unique backedge value.
- if (Phi->getNumIncomingValues() != 2)
- return false;
- Value *BEValue = nullptr, *StartValue = nullptr;
- if (TheLoop->contains(Phi->getIncomingBlock(0))) {
- BEValue = Phi->getIncomingValue(0);
- StartValue = Phi->getIncomingValue(1);
- } else {
- assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
- "Unexpected Phi node in the loop");
- BEValue = Phi->getIncomingValue(1);
- StartValue = Phi->getIncomingValue(0);
- }
-
- BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue);
- if (!BOp)
- return false;
-
- Value *Addend = nullptr;
- if (BOp->getOpcode() == Instruction::FAdd) {
- if (BOp->getOperand(0) == Phi)
- Addend = BOp->getOperand(1);
- else if (BOp->getOperand(1) == Phi)
- Addend = BOp->getOperand(0);
- } else if (BOp->getOpcode() == Instruction::FSub)
- if (BOp->getOperand(0) == Phi)
- Addend = BOp->getOperand(1);
-
- if (!Addend)
- return false;
-
- // The addend should be loop invariant
- if (auto *I = dyn_cast<Instruction>(Addend))
- if (TheLoop->contains(I))
- return false;
-
- // FP Step has unknown SCEV
- const SCEV *Step = SE->getUnknown(Addend);
- D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
- return true;
-}
-
-/// This function is called when we suspect that the update-chain of a phi node
-/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
-/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
-/// predicate P under which the SCEV expression for the phi can be the
-/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
-/// cast instructions that are involved in the update-chain of this induction.
-/// A caller that adds the required runtime predicate can be free to drop these
-/// cast instructions, and compute the phi using \p AR (instead of some scev
-/// expression with casts).
-///
-/// For example, without a predicate the scev expression can take the following
-/// form:
-/// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
-///
-/// It corresponds to the following IR sequence:
-/// %for.body:
-/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
-/// %casted_phi = "ExtTrunc i64 %x"
-/// %add = add i64 %casted_phi, %step
-///
-/// where %x is given in \p PN,
-/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
-/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
-/// several forms, for example, such as:
-/// ExtTrunc1: %casted_phi = and %x, 2^n-1
-/// or:
-/// ExtTrunc2: %t = shl %x, m
-/// %casted_phi = ashr %t, m
-///
-/// If we are able to find such sequence, we return the instructions
-/// we found, namely %casted_phi and the instructions on its use-def chain up
-/// to the phi (not including the phi).
-static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE,
- const SCEVUnknown *PhiScev,
- const SCEVAddRecExpr *AR,
- SmallVectorImpl<Instruction *> &CastInsts) {
-
- assert(CastInsts.empty() && "CastInsts is expected to be empty.");
- auto *PN = cast<PHINode>(PhiScev->getValue());
- assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
- const Loop *L = AR->getLoop();
-
- // Find any cast instructions that participate in the def-use chain of
- // PhiScev in the loop.
- // FORNOW/TODO: We currently expect the def-use chain to include only
- // two-operand instructions, where one of the operands is an invariant.
- // createAddRecFromPHIWithCasts() currently does not support anything more
- // involved than that, so we keep the search simple. This can be
- // extended/generalized as needed.
-
- auto getDef = [&](const Value *Val) -> Value * {
- const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
- if (!BinOp)
- return nullptr;
- Value *Op0 = BinOp->getOperand(0);
- Value *Op1 = BinOp->getOperand(1);
- Value *Def = nullptr;
- if (L->isLoopInvariant(Op0))
- Def = Op1;
- else if (L->isLoopInvariant(Op1))
- Def = Op0;
- return Def;
- };
-
- // Look for the instruction that defines the induction via the
- // loop backedge.
- BasicBlock *Latch = L->getLoopLatch();
- if (!Latch)
- return false;
- Value *Val = PN->getIncomingValueForBlock(Latch);
- if (!Val)
- return false;
-
- // Follow the def-use chain until the induction phi is reached.
- // If on the way we encounter a Value that has the same SCEV Expr as the
- // phi node, we can consider the instructions we visit from that point
- // as part of the cast-sequence that can be ignored.
- bool InCastSequence = false;
- auto *Inst = dyn_cast<Instruction>(Val);
- while (Val != PN) {
- // If we encountered a phi node other than PN, or if we left the loop,
- // we bail out.
- if (!Inst || !L->contains(Inst)) {
- return false;
- }
- auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));
- if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))
- InCastSequence = true;
- if (InCastSequence) {
- // Only the last instruction in the cast sequence is expected to have
- // uses outside the induction def-use chain.
- if (!CastInsts.empty())
- if (!Inst->hasOneUse())
- return false;
- CastInsts.push_back(Inst);
- }
- Val = getDef(Val);
- if (!Val)
- return false;
- Inst = dyn_cast<Instruction>(Val);
- }
-
- return InCastSequence;
-}
-
-bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
- PredicatedScalarEvolution &PSE,
- InductionDescriptor &D,
- bool Assume) {
- Type *PhiTy = Phi->getType();
-
- // Handle integer and pointer inductions variables.
- // Now we handle also FP induction but not trying to make a
- // recurrent expression from the PHI node in-place.
-
- if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() &&
- !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
- return false;
-
- if (PhiTy->isFloatingPointTy())
- return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
-
- const SCEV *PhiScev = PSE.getSCEV(Phi);
- const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
-
- // We need this expression to be an AddRecExpr.
- if (Assume && !AR)
- AR = PSE.getAsAddRec(Phi);
-
- if (!AR) {
- LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
- return false;
- }
-
- // Record any Cast instructions that participate in the induction update
- const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
- // If we started from an UnknownSCEV, and managed to build an addRecurrence
- // only after enabling Assume with PSCEV, this means we may have encountered
- // cast instructions that required adding a runtime check in order to
- // guarantee the correctness of the AddRecurence respresentation of the
- // induction.
- if (PhiScev != AR && SymbolicPhi) {
- SmallVector<Instruction *, 2> Casts;
- if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))
- return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);
- }
-
- return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
-}
-
-bool InductionDescriptor::isInductionPHI(
- PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
- InductionDescriptor &D, const SCEV *Expr,
- SmallVectorImpl<Instruction *> *CastsToIgnore) {
- Type *PhiTy = Phi->getType();
- // We only handle integer and pointer inductions variables.
- if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
- return false;
-
- // Check that the PHI is consecutive.
- const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
-
- if (!AR) {
- LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
- return false;
- }
-
- if (AR->getLoop() != TheLoop) {
- // FIXME: We should treat this as a uniform. Unfortunately, we
- // don't currently know how to handled uniform PHIs.
- LLVM_DEBUG(
- dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");
- return false;
- }
-
- Value *StartValue =
- Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
- const SCEV *Step = AR->getStepRecurrence(*SE);
- // Calculate the pointer stride and check if it is consecutive.
- // The stride may be a constant or a loop invariant integer value.
- const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
- if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop))
- return false;
-
- if (PhiTy->isIntegerTy()) {
- D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/ nullptr,
- CastsToIgnore);
- return true;
- }
-
- assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
- // Pointer induction should be a constant.
- if (!ConstStep)
- return false;
-
- ConstantInt *CV = ConstStep->getValue();
- Type *PointerElementType = PhiTy->getPointerElementType();
- // The pointer stride cannot be determined if the pointer element type is not
- // sized.
- if (!PointerElementType->isSized())
- return false;
-
- const DataLayout &DL = Phi->getModule()->getDataLayout();
- int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType));
- if (!Size)
- return false;
-
- int64_t CVSize = CV->getSExtValue();
- if (CVSize % Size)
- return false;
- auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size,
- true /* signed */);
- D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue);
- return true;
-}
+static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
bool PreserveLCSSA) {
@@ -1173,7 +79,7 @@ bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
return false;
auto *NewExitBB = SplitBlockPredecessors(
- BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA);
+ BB, InLoopPredecessors, ".loopexit", DT, LI, nullptr, PreserveLCSSA);
if (!NewExitBB)
LLVM_DEBUG(
@@ -1286,37 +192,231 @@ void llvm::initializeLoopPassPass(PassRegistry &Registry) {
/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
/// operand or null otherwise. If the string metadata is not found return
/// Optional's not-a-value.
-Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop,
+Optional<const MDOperand *> llvm::findStringMetadataForLoop(const Loop *TheLoop,
StringRef Name) {
- MDNode *LoopID = TheLoop->getLoopID();
- // Return none if LoopID is false.
- if (!LoopID)
+ MDNode *MD = findOptionMDForLoop(TheLoop, Name);
+ if (!MD)
return None;
+ switch (MD->getNumOperands()) {
+ case 1:
+ return nullptr;
+ case 2:
+ return &MD->getOperand(1);
+ default:
+ llvm_unreachable("loop metadata has 0 or 1 operand");
+ }
+}
- // First operand should refer to the loop id itself.
- assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
- assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
+static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop,
+ StringRef Name) {
+ MDNode *MD = findOptionMDForLoop(TheLoop, Name);
+ if (!MD)
+ return None;
+ switch (MD->getNumOperands()) {
+ case 1:
+ // When the value is absent it is interpreted as 'attribute set'.
+ return true;
+ case 2:
+ return mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get());
+ }
+ llvm_unreachable("unexpected number of options");
+}
- // Iterate over LoopID operands and look for MDString Metadata
- for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
- MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
- if (!MD)
- continue;
- MDString *S = dyn_cast<MDString>(MD->getOperand(0));
- if (!S)
+static bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) {
+ return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false);
+}
+
+llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop,
+ StringRef Name) {
+ const MDOperand *AttrMD =
+ findStringMetadataForLoop(TheLoop, Name).getValueOr(nullptr);
+ if (!AttrMD)
+ return None;
+
+ ConstantInt *IntMD = mdconst::extract_or_null<ConstantInt>(AttrMD->get());
+ if (!IntMD)
+ return None;
+
+ return IntMD->getSExtValue();
+}
+
+Optional<MDNode *> llvm::makeFollowupLoopID(
+ MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,
+ const char *InheritOptionsExceptPrefix, bool AlwaysNew) {
+ if (!OrigLoopID) {
+ if (AlwaysNew)
+ return nullptr;
+ return None;
+ }
+
+ assert(OrigLoopID->getOperand(0) == OrigLoopID);
+
+ bool InheritAllAttrs = !InheritOptionsExceptPrefix;
+ bool InheritSomeAttrs =
+ InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0';
+ SmallVector<Metadata *, 8> MDs;
+ MDs.push_back(nullptr);
+
+ bool Changed = false;
+ if (InheritAllAttrs || InheritSomeAttrs) {
+ for (const MDOperand &Existing : drop_begin(OrigLoopID->operands(), 1)) {
+ MDNode *Op = cast<MDNode>(Existing.get());
+
+ auto InheritThisAttribute = [InheritSomeAttrs,
+ InheritOptionsExceptPrefix](MDNode *Op) {
+ if (!InheritSomeAttrs)
+ return false;
+
+ // Skip malformatted attribute metadata nodes.
+ if (Op->getNumOperands() == 0)
+ return true;
+ Metadata *NameMD = Op->getOperand(0).get();
+ if (!isa<MDString>(NameMD))
+ return true;
+ StringRef AttrName = cast<MDString>(NameMD)->getString();
+
+ // Do not inherit excluded attributes.
+ return !AttrName.startswith(InheritOptionsExceptPrefix);
+ };
+
+ if (InheritThisAttribute(Op))
+ MDs.push_back(Op);
+ else
+ Changed = true;
+ }
+ } else {
+ // Modified if we dropped at least one attribute.
+ Changed = OrigLoopID->getNumOperands() > 1;
+ }
+
+ bool HasAnyFollowup = false;
+ for (StringRef OptionName : FollowupOptions) {
+ MDNode *FollowupNode = findOptionMDForLoopID(OrigLoopID, OptionName);
+ if (!FollowupNode)
continue;
- // Return true if MDString holds expected MetaData.
- if (Name.equals(S->getString()))
- switch (MD->getNumOperands()) {
- case 1:
- return nullptr;
- case 2:
- return &MD->getOperand(1);
- default:
- llvm_unreachable("loop metadata has 0 or 1 operand");
- }
+
+ HasAnyFollowup = true;
+ for (const MDOperand &Option : drop_begin(FollowupNode->operands(), 1)) {
+ MDs.push_back(Option.get());
+ Changed = true;
+ }
}
- return None;
+
+ // Attributes of the followup loop not specified explicity, so signal to the
+ // transformation pass to add suitable attributes.
+ if (!AlwaysNew && !HasAnyFollowup)
+ return None;
+
+ // If no attributes were added or remove, the previous loop Id can be reused.
+ if (!AlwaysNew && !Changed)
+ return OrigLoopID;
+
+ // No attributes is equivalent to having no !llvm.loop metadata at all.
+ if (MDs.size() == 1)
+ return nullptr;
+
+ // Build the new loop ID.
+ MDTuple *FollowupLoopID = MDNode::get(OrigLoopID->getContext(), MDs);
+ FollowupLoopID->replaceOperandWith(0, FollowupLoopID);
+ return FollowupLoopID;
+}
+
+bool llvm::hasDisableAllTransformsHint(const Loop *L) {
+ return getBooleanLoopAttribute(L, LLVMLoopDisableNonforced);
+}
+
+TransformationMode llvm::hasUnrollTransformation(Loop *L) {
+ if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
+ return TM_SuppressedByUser;
+
+ Optional<int> Count =
+ getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count");
+ if (Count.hasValue())
+ return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
+
+ if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"))
+ return TM_ForcedByUser;
+
+ if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full"))
+ return TM_ForcedByUser;
+
+ if (hasDisableAllTransformsHint(L))
+ return TM_Disable;
+
+ return TM_Unspecified;
+}
+
+TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) {
+ if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable"))
+ return TM_SuppressedByUser;
+
+ Optional<int> Count =
+ getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count");
+ if (Count.hasValue())
+ return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
+
+ if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable"))
+ return TM_ForcedByUser;
+
+ if (hasDisableAllTransformsHint(L))
+ return TM_Disable;
+
+ return TM_Unspecified;
+}
+
+TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
+ Optional<bool> Enable =
+ getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable");
+
+ if (Enable == false)
+ return TM_SuppressedByUser;
+
+ Optional<int> VectorizeWidth =
+ getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width");
+ Optional<int> InterleaveCount =
+ getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
+
+ if (Enable == true) {
+ // 'Forcing' vector width and interleave count to one effectively disables
+ // this tranformation.
+ if (VectorizeWidth == 1 && InterleaveCount == 1)
+ return TM_SuppressedByUser;
+ return TM_ForcedByUser;
+ }
+
+ if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
+ return TM_Disable;
+
+ if (VectorizeWidth == 1 && InterleaveCount == 1)
+ return TM_Disable;
+
+ if (VectorizeWidth > 1 || InterleaveCount > 1)
+ return TM_Enable;
+
+ if (hasDisableAllTransformsHint(L))
+ return TM_Disable;
+
+ return TM_Unspecified;
+}
+
+TransformationMode llvm::hasDistributeTransformation(Loop *L) {
+ if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable"))
+ return TM_ForcedByUser;
+
+ if (hasDisableAllTransformsHint(L))
+ return TM_Disable;
+
+ return TM_Unspecified;
+}
+
+TransformationMode llvm::hasLICMVersioningTransformation(Loop *L) {
+ if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable"))
+ return TM_SuppressedByUser;
+
+ if (hasDisableAllTransformsHint(L))
+ return TM_Disable;
+
+ return TM_Unspecified;
}
/// Does a BFS from a given node to all of its children inside a given loop.
@@ -1425,14 +525,19 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
// Remove the old branch.
Preheader->getTerminator()->eraseFromParent();
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
if (DT) {
// Update the dominator tree by informing it about the new edge from the
// preheader to the exit.
- DT->insertEdge(Preheader, ExitBlock);
+ DTU.insertEdge(Preheader, ExitBlock);
// Inform the dominator tree about the removed edge.
- DT->deleteEdge(Preheader, L->getHeader());
+ DTU.deleteEdge(Preheader, L->getHeader());
}
+ // Use a map to unique and a vector to guarantee deterministic ordering.
+ llvm::SmallDenseSet<std::pair<DIVariable *, DIExpression *>, 4> DeadDebugSet;
+ llvm::SmallVector<DbgVariableIntrinsic *, 4> DeadDebugInst;
+
// Given LCSSA form is satisfied, we should not have users of instructions
// within the dead loop outside of the loop. However, LCSSA doesn't take
// unreachable uses into account. We handle them here.
@@ -1457,8 +562,27 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
"Unexpected user in reachable block");
U.set(Undef);
}
+ auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I);
+ if (!DVI)
+ continue;
+ auto Key = DeadDebugSet.find({DVI->getVariable(), DVI->getExpression()});
+ if (Key != DeadDebugSet.end())
+ continue;
+ DeadDebugSet.insert({DVI->getVariable(), DVI->getExpression()});
+ DeadDebugInst.push_back(DVI);
}
+ // After the loop has been deleted all the values defined and modified
+ // inside the loop are going to be unavailable.
+ // Since debug values in the loop have been deleted, inserting an undef
+ // dbg.value truncates the range of any dbg.value before the loop where the
+ // loop used to be. This is particularly important for constant values.
+ DIBuilder DIB(*ExitBlock->getModule());
+ for (auto *DVI : DeadDebugInst)
+ DIB.insertDbgValueIntrinsic(
+ UndefValue::get(Builder.getInt32Ty()), DVI->getVariable(),
+ DVI->getExpression(), DVI->getDebugLoc(), ExitBlock->getFirstNonPHI());
+
// Remove the block from the reference counting scheme, so that we can
// delete it freely later.
for (auto *Block : L->blocks())
@@ -1519,6 +643,28 @@ Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
return (FalseVal + (TrueVal / 2)) / TrueVal;
}
+bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
+ ScalarEvolution &SE) {
+ Loop *OuterL = InnerLoop->getParentLoop();
+ if (!OuterL)
+ return true;
+
+ // Get the backedge taken count for the inner loop
+ BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
+ const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch);
+ if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) ||
+ !InnerLoopBECountSC->getType()->isIntegerTy())
+ return false;
+
+ // Get whether count is invariant to the outer loop
+ ScalarEvolution::LoopDisposition LD =
+ SE.getLoopDisposition(InnerLoopBECountSC, OuterL);
+ if (LD != ScalarEvolution::LoopInvariant)
+ return false;
+
+ return true;
+}
+
/// Adds a 'fast' flag to floating point operations.
static Value *addFastMathFlag(Value *V) {
if (isa<FPMathOperator>(V)) {
@@ -1529,6 +675,51 @@ static Value *addFastMathFlag(Value *V) {
return V;
}
+Value *llvm::createMinMaxOp(IRBuilder<> &Builder,
+ RecurrenceDescriptor::MinMaxRecurrenceKind RK,
+ Value *Left, Value *Right) {
+ CmpInst::Predicate P = CmpInst::ICMP_NE;
+ switch (RK) {
+ default:
+ llvm_unreachable("Unknown min/max recurrence kind");
+ case RecurrenceDescriptor::MRK_UIntMin:
+ P = CmpInst::ICMP_ULT;
+ break;
+ case RecurrenceDescriptor::MRK_UIntMax:
+ P = CmpInst::ICMP_UGT;
+ break;
+ case RecurrenceDescriptor::MRK_SIntMin:
+ P = CmpInst::ICMP_SLT;
+ break;
+ case RecurrenceDescriptor::MRK_SIntMax:
+ P = CmpInst::ICMP_SGT;
+ break;
+ case RecurrenceDescriptor::MRK_FloatMin:
+ P = CmpInst::FCMP_OLT;
+ break;
+ case RecurrenceDescriptor::MRK_FloatMax:
+ P = CmpInst::FCMP_OGT;
+ break;
+ }
+
+ // We only match FP sequences that are 'fast', so we can unconditionally
+ // set it on any generated instructions.
+ IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+ FastMathFlags FMF;
+ FMF.setFast();
+ Builder.setFastMathFlags(FMF);
+
+ Value *Cmp;
+ if (RK == RecurrenceDescriptor::MRK_FloatMin ||
+ RK == RecurrenceDescriptor::MRK_FloatMax)
+ Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
+ else
+ Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
+
+ Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
+ return Select;
+}
+
// Helper to generate an ordered reduction.
Value *
llvm::getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src,
@@ -1550,8 +741,7 @@ llvm::getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src,
} else {
assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
"Invalid min/max");
- Result = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, Result,
- Ext);
+ Result = createMinMaxOp(Builder, MinMaxKind, Result, Ext);
}
if (!RedOps.empty())
@@ -1594,8 +784,7 @@ llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
} else {
assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
"Invalid min/max");
- TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec,
- Shuf);
+ TmpVec = createMinMaxOp(Builder, MinMaxKind, TmpVec, Shuf);
}
if (!RedOps.empty())
propagateIRFlags(TmpVec, RedOps);
@@ -1613,7 +802,7 @@ Value *llvm::createSimpleTargetReduction(
assert(isa<VectorType>(Src->getType()) && "Type must be a vector");
Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType());
- std::function<Value*()> BuildFunc;
+ std::function<Value *()> BuildFunc;
using RD = RecurrenceDescriptor;
RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
// TODO: Support creating ordered reductions.
@@ -1739,3 +928,39 @@ void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) {
VecOp->andIRFlags(V);
}
}
+
+bool llvm::isKnownNegativeInLoop(const SCEV *S, const Loop *L,
+ ScalarEvolution &SE) {
+ const SCEV *Zero = SE.getZero(S->getType());
+ return SE.isAvailableAtLoopEntry(S, L) &&
+ SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, S, Zero);
+}
+
+bool llvm::isKnownNonNegativeInLoop(const SCEV *S, const Loop *L,
+ ScalarEvolution &SE) {
+ const SCEV *Zero = SE.getZero(S->getType());
+ return SE.isAvailableAtLoopEntry(S, L) &&
+ SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, S, Zero);
+}
+
+bool llvm::cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
+ bool Signed) {
+ unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
+ APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) :
+ APInt::getMinValue(BitWidth);
+ auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ return SE.isAvailableAtLoopEntry(S, L) &&
+ SE.isLoopEntryGuardedByCond(L, Predicate, S,
+ SE.getConstant(Min));
+}
+
+bool llvm::cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
+ bool Signed) {
+ unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
+ APInt Max = Signed ? APInt::getSignedMaxValue(BitWidth) :
+ APInt::getMaxValue(BitWidth);
+ auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+ return SE.isAvailableAtLoopEntry(S, L) &&
+ SE.isLoopEntryGuardedByCond(L, Predicate, S,
+ SE.getConstant(Max));
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 03006ef3a2d3..661b4fa5bcb7 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -301,7 +301,7 @@ static void createMemMoveLoop(Instruction *InsertBefore,
// the appropriate conditional branches when the loop is built.
ICmpInst *PtrCompare = new ICmpInst(InsertBefore, ICmpInst::ICMP_ULT,
SrcAddr, DstAddr, "compare_src_dst");
- TerminatorInst *ThenTerm, *ElseTerm;
+ Instruction *ThenTerm, *ElseTerm;
SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore, &ThenTerm,
&ElseTerm);
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index e99ecfef19cd..d019a44fc705 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -372,7 +372,7 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
Cases.push_back(CaseRange(Case.getCaseValue(), Case.getCaseValue(),
Case.getCaseSuccessor()));
- llvm::sort(Cases.begin(), Cases.end(), CaseCmp());
+ llvm::sort(Cases, CaseCmp());
// Merge case into clusters
if (Cases.size() >= 2) {
diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index ba4b7f3cc263..ae5e72ea4d30 100644
--- a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -174,6 +174,49 @@ std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
return std::make_pair(Ctor, InitFunction);
}
+std::pair<Function *, Function *>
+llvm::getOrCreateSanitizerCtorAndInitFunctions(
+ Module &M, StringRef CtorName, StringRef InitName,
+ ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
+ function_ref<void(Function *, Function *)> FunctionsCreatedCallback,
+ StringRef VersionCheckName) {
+ assert(!CtorName.empty() && "Expected ctor function name");
+
+ if (Function *Ctor = M.getFunction(CtorName))
+ // FIXME: Sink this logic into the module, similar to the handling of
+ // globals. This will make moving to a concurrent model much easier.
+ if (Ctor->arg_size() == 0 ||
+ Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
+ return {Ctor, declareSanitizerInitFunction(M, InitName, InitArgTypes)};
+
+ Function *Ctor, *InitFunction;
+ std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
+ M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName);
+ FunctionsCreatedCallback(Ctor, InitFunction);
+ return std::make_pair(Ctor, InitFunction);
+}
+
+Function *llvm::getOrCreateInitFunction(Module &M, StringRef Name) {
+ assert(!Name.empty() && "Expected init function name");
+ if (Function *F = M.getFunction(Name)) {
+ if (F->arg_size() != 0 ||
+ F->getReturnType() != Type::getVoidTy(M.getContext())) {
+ std::string Err;
+ raw_string_ostream Stream(Err);
+ Stream << "Sanitizer interface function defined with wrong type: " << *F;
+ report_fatal_error(Err);
+ }
+ return F;
+ }
+ Function *F = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ Name, AttributeList(), Type::getVoidTy(M.getContext())));
+ F->setLinkage(Function::ExternalLinkage);
+
+ appendToGlobalCtors(M, F, 0);
+
+ return F;
+}
+
void llvm::filterDeadComdatFunctions(
Module &M, SmallVectorImpl<Function *> &DeadComdatFunctions) {
// Build a map from the comdat to the number of entries in that comdat we
diff --git a/contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp
deleted file mode 100644
index 6d0b96f6aa8a..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-//===-- OrderedInstructions.cpp - Instruction dominance function ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines utility to check dominance relation of 2 instructions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/OrderedInstructions.h"
-using namespace llvm;
-
-bool OrderedInstructions::localDominates(const Instruction *InstA,
- const Instruction *InstB) const {
- assert(InstA->getParent() == InstB->getParent() &&
- "Instructions must be in the same basic block");
-
- const BasicBlock *IBB = InstA->getParent();
- auto OBB = OBBMap.find(IBB);
- if (OBB == OBBMap.end())
- OBB = OBBMap.insert({IBB, make_unique<OrderedBasicBlock>(IBB)}).first;
- return OBB->second->dominates(InstA, InstB);
-}
-
-/// Given 2 instructions, use OrderedBasicBlock to check for dominance relation
-/// if the instructions are in the same basic block, Otherwise, use dominator
-/// tree.
-bool OrderedInstructions::dominates(const Instruction *InstA,
- const Instruction *InstB) const {
- // Use ordered basic block to do dominance check in case the 2 instructions
- // are in the same basic block.
- if (InstA->getParent() == InstB->getParent())
- return localDominates(InstA, InstB);
- return DT->dominates(InstA->getParent(), InstB->getParent());
-}
-
-bool OrderedInstructions::dfsBefore(const Instruction *InstA,
- const Instruction *InstB) const {
- // Use ordered basic block in case the 2 instructions are in the same basic
- // block.
- if (InstA->getParent() == InstB->getParent())
- return localDominates(InstA, InstB);
-
- DomTreeNode *DA = DT->getNode(InstA->getParent());
- DomTreeNode *DB = DT->getNode(InstB->getParent());
- return DA->getDFSNumIn() < DB->getDFSNumIn();
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index 2923977b791a..585ce6b4c118 100644
--- a/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -35,7 +35,6 @@
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/OrderedInstructions.h"
#include <algorithm>
#define DEBUG_TYPE "predicateinfo"
using namespace llvm;
@@ -523,7 +522,7 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter,
if (isa<PredicateWithEdge>(ValInfo)) {
IRBuilder<> B(getBranchTerminator(ValInfo));
Function *IF = getCopyDeclaration(F.getParent(), Op->getType());
- if (IF->user_begin() == IF->user_end())
+ if (empty(IF->users()))
CreatedDeclarations.insert(IF);
CallInst *PIC =
B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++));
@@ -535,7 +534,7 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter,
"Should not have gotten here without it being an assume");
IRBuilder<> B(PAssume->AssumeInst);
Function *IF = getCopyDeclaration(F.getParent(), Op->getType());
- if (IF->user_begin() == IF->user_end())
+ if (empty(IF->users()))
CreatedDeclarations.insert(IF);
CallInst *PIC = B.CreateCall(IF, Op);
PredicateMap.insert({PIC, ValInfo});
@@ -570,7 +569,7 @@ void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {
auto Comparator = [&](const Value *A, const Value *B) {
return valueComesBefore(OI, A, B);
};
- llvm::sort(OpsToRename.begin(), OpsToRename.end(), Comparator);
+ llvm::sort(OpsToRename, Comparator);
ValueDFS_Compare Compare(OI);
// Compute liveness, and rename in O(uses) per Op.
for (auto *Op : OpsToRename) {
diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 86e15bbd7f22..91e4f4254b3e 100644
--- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -82,8 +82,7 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
if (SI->isVolatile())
return false;
} else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
- if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
- II->getIntrinsicID() != Intrinsic::lifetime_end)
+ if (!II->isLifetimeStartOrEnd())
return false;
} else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
@@ -116,7 +115,7 @@ struct AllocaInfo {
bool OnlyUsedInOneBlock;
Value *AllocaPointerVal;
- TinyPtrVector<DbgInfoIntrinsic *> DbgDeclares;
+ TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares;
void clear() {
DefiningBlocks.clear();
@@ -263,7 +262,7 @@ struct PromoteMem2Reg {
/// For each alloca, we keep track of the dbg.declare intrinsic that
/// describes it, if any, so that we can convert it to a dbg.value
/// intrinsic if the alloca gets promoted.
- SmallVector<TinyPtrVector<DbgInfoIntrinsic *>, 8> AllocaDbgDeclares;
+ SmallVector<TinyPtrVector<DbgVariableIntrinsic *>, 8> AllocaDbgDeclares;
/// The set of basic blocks the renamer has already visited.
SmallPtrSet<BasicBlock *, 16> Visited;
@@ -426,7 +425,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
// Record debuginfo for the store and remove the declaration's
// debuginfo.
- for (DbgInfoIntrinsic *DII : Info.DbgDeclares) {
+ for (DbgVariableIntrinsic *DII : Info.DbgDeclares) {
DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB);
DII->eraseFromParent();
@@ -477,7 +476,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
// Sort the stores by their index, making it efficient to do a lookup with a
// binary search.
- llvm::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first());
+ llvm::sort(StoresByIndex, less_first());
// Walk all of the loads from this alloca, replacing them with the nearest
// store above them, if any.
@@ -527,7 +526,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
while (!AI->use_empty()) {
StoreInst *SI = cast<StoreInst>(AI->user_back());
// Record debuginfo for the store before removing it.
- for (DbgInfoIntrinsic *DII : Info.DbgDeclares) {
+ for (DbgVariableIntrinsic *DII : Info.DbgDeclares) {
DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
ConvertDebugDeclareToDebugValue(DII, SI, DIB);
}
@@ -539,7 +538,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
LBI.deleteValue(AI);
// The alloca's debuginfo can be removed as well.
- for (DbgInfoIntrinsic *DII : Info.DbgDeclares) {
+ for (DbgVariableIntrinsic *DII : Info.DbgDeclares) {
DII->eraseFromParent();
LBI.deleteValue(DII);
}
@@ -638,10 +637,9 @@ void PromoteMem2Reg::run() {
SmallVector<BasicBlock *, 32> PHIBlocks;
IDF.calculate(PHIBlocks);
if (PHIBlocks.size() > 1)
- llvm::sort(PHIBlocks.begin(), PHIBlocks.end(),
- [this](BasicBlock *A, BasicBlock *B) {
- return BBNumbers.lookup(A) < BBNumbers.lookup(B);
- });
+ llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) {
+ return BBNumbers.lookup(A) < BBNumbers.lookup(B);
+ });
unsigned CurrentVersion = 0;
for (BasicBlock *BB : PHIBlocks)
@@ -752,14 +750,18 @@ void PromoteMem2Reg::run() {
// Ok, now we know that all of the PHI nodes are missing entries for some
// basic blocks. Start by sorting the incoming predecessors for efficient
// access.
- llvm::sort(Preds.begin(), Preds.end());
+ auto CompareBBNumbers = [this](BasicBlock *A, BasicBlock *B) {
+ return BBNumbers.lookup(A) < BBNumbers.lookup(B);
+ };
+ llvm::sort(Preds, CompareBBNumbers);
// Now we loop through all BB's which have entries in SomePHI and remove
// them from the Preds list.
for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
// Do a log(n) search of the Preds list for the entry we want.
SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound(
- Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i));
+ Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i),
+ CompareBBNumbers);
assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) &&
"PHI node has entry for a block which is not a predecessor!");
@@ -932,7 +934,7 @@ NextIteration:
// The currently active variable for this block is now the PHI.
IncomingVals[AllocaNo] = APN;
- for (DbgInfoIntrinsic *DII : AllocaDbgDeclares[AllocaNo])
+ for (DbgVariableIntrinsic *DII : AllocaDbgDeclares[AllocaNo])
ConvertDebugDeclareToDebugValue(DII, APN, DIB);
// Get the next phi node.
@@ -951,7 +953,7 @@ NextIteration:
if (!Visited.insert(BB).second)
return;
- for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) {
+ for (BasicBlock::iterator II = BB->begin(); !II->isTerminator();) {
Instruction *I = &*II++; // get the instruction, increment iterator
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
@@ -992,7 +994,7 @@ NextIteration:
// Record debuginfo for the store before removing it.
IncomingLocs[AllocaNo] = SI->getDebugLoc();
- for (DbgInfoIntrinsic *DII : AllocaDbgDeclares[ai->second])
+ for (DbgVariableIntrinsic *DII : AllocaDbgDeclares[ai->second])
ConvertDebugDeclareToDebugValue(DII, SI, DIB);
BB->getInstList().erase(SI);
}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index c87b5c16ffce..03b73954321d 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -173,14 +173,15 @@ class SimplifyCFGOpt {
const DataLayout &DL;
SmallPtrSetImpl<BasicBlock *> *LoopHeaders;
const SimplifyCFGOptions &Options;
+ bool Resimplify;
- Value *isValueEqualityComparison(TerminatorInst *TI);
+ Value *isValueEqualityComparison(Instruction *TI);
BasicBlock *GetValueEqualityComparisonCases(
- TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases);
- bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
+ Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
+ bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
BasicBlock *Pred,
IRBuilder<> &Builder);
- bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
+ bool FoldValueComparisonIntoPredecessors(Instruction *TI,
IRBuilder<> &Builder);
bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
@@ -194,6 +195,9 @@ class SimplifyCFGOpt {
bool SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
bool SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
+ bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
+ IRBuilder<> &Builder);
+
public:
SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL,
SmallPtrSetImpl<BasicBlock *> *LoopHeaders,
@@ -201,6 +205,13 @@ public:
: TTI(TTI), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {}
bool run(BasicBlock *BB);
+ bool simplifyOnce(BasicBlock *BB);
+
+ // Helper to set Resimplify and return change indication.
+ bool requestResimplify() {
+ Resimplify = true;
+ return true;
+ }
};
} // end anonymous namespace
@@ -208,7 +219,7 @@ public:
/// Return true if it is safe to merge these two
/// terminator instructions together.
static bool
-SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2,
+SafeToMergeTerminators(Instruction *SI1, Instruction *SI2,
SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
if (SI1 == SI2)
return false; // Can't merge with self!
@@ -315,7 +326,7 @@ static unsigned ComputeSpeculationCost(const User *I,
/// V plus its non-dominating operands. If that cost is greater than
/// CostRemaining, false is returned and CostRemaining is undefined.
static bool DominatesMergePoint(Value *V, BasicBlock *BB,
- SmallPtrSetImpl<Instruction *> *AggressiveInsts,
+ SmallPtrSetImpl<Instruction *> &AggressiveInsts,
unsigned &CostRemaining,
const TargetTransformInfo &TTI,
unsigned Depth = 0) {
@@ -349,13 +360,8 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
return true;
- // If we aren't allowing aggressive promotion anymore, then don't consider
- // instructions in the 'if region'.
- if (!AggressiveInsts)
- return false;
-
// If we have seen this instruction before, don't count it again.
- if (AggressiveInsts->count(I))
+ if (AggressiveInsts.count(I))
return true;
// Okay, it looks like the instruction IS in the "condition". Check to
@@ -373,7 +379,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// is expected to be undone in CodeGenPrepare if the speculation has not
// enabled further IR optimizations.
if (Cost > CostRemaining &&
- (!SpeculateOneExpensiveInst || !AggressiveInsts->empty() || Depth > 0))
+ (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0))
return false;
// Avoid unsigned wrap.
@@ -386,7 +392,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
Depth + 1))
return false;
// Okay, it's safe to do this! Remember this instruction.
- AggressiveInsts->insert(I);
+ AggressiveInsts.insert(I);
return true;
}
@@ -664,7 +670,7 @@ private:
} // end anonymous namespace
-static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
+static void EraseTerminatorAndDCECond(Instruction *TI) {
Instruction *Cond = nullptr;
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
Cond = dyn_cast<Instruction>(SI->getCondition());
@@ -682,12 +688,12 @@ static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
/// Return true if the specified terminator checks
/// to see if a value is equal to constant integer value.
-Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
+Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
Value *CV = nullptr;
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
// Do not permit merging of large switch instructions into their
// predecessors unless there is only one predecessor.
- if (SI->getNumSuccessors() * pred_size(SI->getParent()) <= 128)
+ if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
CV = SI->getCondition();
} else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
if (BI->isConditional() && BI->getCondition()->hasOneUse())
@@ -710,7 +716,7 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
/// Given a value comparison instruction,
/// decode all of the 'cases' that it represents and return the 'default' block.
BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
- TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
+ Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
Cases.reserve(SI->getNumCases());
for (auto Case : SI->cases())
@@ -800,7 +806,7 @@ static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
/// determines the outcome of this comparison. If so, simplify TI. This does a
/// very limited form of jump threading.
bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
- TerminatorInst *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
+ Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
if (!PredVal)
return false; // Not a value comparison in predecessor.
@@ -848,7 +854,7 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
<< "Through successor TI: " << *TI << "Leaving: " << *NI
<< "\n");
- EraseTerminatorInstAndDCECond(TI);
+ EraseTerminatorAndDCECond(TI);
return true;
}
@@ -930,7 +936,7 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
<< "Through successor TI: " << *TI << "Leaving: " << *NI
<< "\n");
- EraseTerminatorInstAndDCECond(TI);
+ EraseTerminatorAndDCECond(TI);
return true;
}
@@ -965,10 +971,10 @@ static inline bool HasBranchWeights(const Instruction *I) {
return false;
}
-/// Get Weights of a given TerminatorInst, the default weight is at the front
+/// Get Weights of a given terminator, the default weight is at the front
/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
/// metadata.
-static void GetBranchWeights(TerminatorInst *TI,
+static void GetBranchWeights(Instruction *TI,
SmallVectorImpl<uint64_t> &Weights) {
MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
assert(MD);
@@ -1002,7 +1008,7 @@ static void FitWeights(MutableArrayRef<uint64_t> Weights) {
/// (either a switch or a branch on "X == c").
/// See if any of the predecessors of the terminator block are value comparisons
/// on the same value. If so, and if safe to do so, fold them together.
-bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
+bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
IRBuilder<> &Builder) {
BasicBlock *BB = TI->getParent();
Value *CV = isValueEqualityComparison(TI); // CondVal
@@ -1014,7 +1020,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
BasicBlock *Pred = Preds.pop_back_val();
// See if the predecessor is a comparison with the same value.
- TerminatorInst *PTI = Pred->getTerminator();
+ Instruction *PTI = Pred->getTerminator();
Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
if (PCV == CV && TI != PTI) {
@@ -1191,7 +1197,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
setBranchWeights(NewSI, MDWeights);
}
- EraseTerminatorInstAndDCECond(PTI);
+ EraseTerminatorAndDCECond(PTI);
// Okay, last check. If BB is still a successor of PSI, then we must
// have an infinite loop case. If so, add an infinitely looping block
@@ -1270,7 +1276,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
do {
// If we are hoisting the terminator instruction, don't move one (making a
// broken BB), instead clone it, and remove BI.
- if (isa<TerminatorInst>(I1))
+ if (I1->isTerminator())
goto HoistTerminator;
// If we're going to hoist a call, make sure that the two instructions we're
@@ -1315,8 +1321,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
LLVMContext::MD_align,
LLVMContext::MD_dereferenceable,
LLVMContext::MD_dereferenceable_or_null,
- LLVMContext::MD_mem_parallel_loop_access};
- combineMetadata(I1, I2, KnownIDs);
+ LLVMContext::MD_mem_parallel_loop_access,
+ LLVMContext::MD_access_group};
+ combineMetadata(I1, I2, KnownIDs, true);
// I1 and I2 are being combined into a single instruction. Its debug
// location is the merged locations of the original instructions.
@@ -1375,7 +1382,13 @@ HoistTerminator:
NT->takeName(I1);
}
+ // Ensure terminator gets a debug location, even an unknown one, in case
+ // it involves inlinable calls.
+ NT->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
+
+ // PHIs created below will adopt NT's merged DebugLoc.
IRBuilder<NoFolder> Builder(NT);
+
// Hoisting one of the terminators from our successor is a great thing.
// Unfortunately, the successors of the if/else blocks may have PHI nodes in
// them. If they do, all PHI entries for BB1/BB2 must agree for all PHI
@@ -1407,7 +1420,7 @@ HoistTerminator:
for (BasicBlock *Succ : successors(BB1))
AddPredecessorToBlock(Succ, BIParent, BB1);
- EraseTerminatorInstAndDCECond(BI);
+ EraseTerminatorAndDCECond(BI);
return true;
}
@@ -1582,7 +1595,7 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
// However, as N-way merge for CallInst is rare, so we use simplified API
// instead of using complex API for N-way merge.
I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
- combineMetadataForCSE(I0, I);
+ combineMetadataForCSE(I0, I, true);
I0->andIRFlags(I);
}
@@ -1940,11 +1953,11 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
}
assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
- // Keep a count of how many times instructions are used within CondBB when
- // they are candidates for sinking into CondBB. Specifically:
+ // Keep a count of how many times instructions are used within ThenBB when
+ // they are candidates for sinking into ThenBB. Specifically:
// - They are defined in BB, and
// - They have no side effects, and
- // - All of their uses are in CondBB.
+ // - All of their uses are in ThenBB.
SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
@@ -1994,14 +2007,14 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
}
}
- // Consider any sink candidates which are only used in CondBB as costs for
+ // Consider any sink candidates which are only used in ThenBB as costs for
// speculation. Note, while we iterate over a DenseMap here, we are summing
// and so iteration order isn't significant.
for (SmallDenseMap<Instruction *, unsigned, 4>::iterator
I = SinkCandidateUseCounts.begin(),
E = SinkCandidateUseCounts.end();
I != E; ++I)
- if (I->first->getNumUses() == I->second) {
+ if (I->first->hasNUses(I->second)) {
++SpeculationCost;
if (SpeculationCost > 1)
return false;
@@ -2241,7 +2254,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
// Loop over all of the edges from PredBB to BB, changing them to branch
// to EdgeBB instead.
- TerminatorInst *PredBBTI = PredBB->getTerminator();
+ Instruction *PredBBTI = PredBB->getTerminator();
for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
if (PredBBTI->getSuccessor(i) == BB) {
BB->removePredecessor(PredBB);
@@ -2249,7 +2262,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
}
// Recurse, simplifying any other constants.
- return FoldCondBranchOnPHI(BI, DL, AC) | true;
+ return FoldCondBranchOnPHI(BI, DL, AC) || true;
}
return false;
@@ -2304,9 +2317,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
continue;
}
- if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts,
+ if (!DominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts,
MaxCostVal0, TTI) ||
- !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts,
+ !DominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
MaxCostVal1, TTI))
return false;
}
@@ -2336,8 +2349,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
IfBlock1 = nullptr;
} else {
DomBlock = *pred_begin(IfBlock1);
- for (BasicBlock::iterator I = IfBlock1->begin(); !isa<TerminatorInst>(I);
- ++I)
+ for (BasicBlock::iterator I = IfBlock1->begin(); !I->isTerminator(); ++I)
if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
// This is not an aggressive instruction that we can promote.
// Because of this, we won't be able to get rid of the control flow, so
@@ -2350,8 +2362,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
IfBlock2 = nullptr;
} else {
DomBlock = *pred_begin(IfBlock2);
- for (BasicBlock::iterator I = IfBlock2->begin(); !isa<TerminatorInst>(I);
- ++I)
+ for (BasicBlock::iterator I = IfBlock2->begin(); !I->isTerminator(); ++I)
if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
// This is not an aggressive instruction that we can promote.
// Because of this, we won't be able to get rid of the control flow, so
@@ -2371,20 +2382,10 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// Move all 'aggressive' instructions, which are defined in the
// conditional parts of the if's up to the dominating block.
- if (IfBlock1) {
- for (auto &I : *IfBlock1)
- I.dropUnknownNonDebugMetadata();
- DomBlock->getInstList().splice(InsertPt->getIterator(),
- IfBlock1->getInstList(), IfBlock1->begin(),
- IfBlock1->getTerminator()->getIterator());
- }
- if (IfBlock2) {
- for (auto &I : *IfBlock2)
- I.dropUnknownNonDebugMetadata();
- DomBlock->getInstList().splice(InsertPt->getIterator(),
- IfBlock2->getInstList(), IfBlock2->begin(),
- IfBlock2->getTerminator()->getIterator());
- }
+ if (IfBlock1)
+ hoistAllInstructionsInto(DomBlock, InsertPt, IfBlock1);
+ if (IfBlock2)
+ hoistAllInstructionsInto(DomBlock, InsertPt, IfBlock2);
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
// Change the PHI node into a select instruction.
@@ -2400,7 +2401,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// At this point, IfBlock1 and IfBlock2 are both empty, so our if statement
// has been flattened. Change DomBlock to jump directly to our new block to
// avoid other simplifycfg's kicking in on the diamond.
- TerminatorInst *OldTI = DomBlock->getTerminator();
+ Instruction *OldTI = DomBlock->getTerminator();
Builder.SetInsertPoint(OldTI);
Builder.CreateBr(BB);
OldTI->eraseFromParent();
@@ -2434,7 +2435,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
TrueSucc->removePredecessor(BI->getParent());
FalseSucc->removePredecessor(BI->getParent());
Builder.CreateRetVoid();
- EraseTerminatorInstAndDCECond(BI);
+ EraseTerminatorAndDCECond(BI);
return true;
}
@@ -2490,7 +2491,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
<< "\n " << *BI << "NewRet = " << *RI << "TRUEBLOCK: "
<< *TrueSucc << "FALSEBLOCK: " << *FalseSucc);
- EraseTerminatorInstAndDCECond(BI);
+ EraseTerminatorAndDCECond(BI);
return true;
}
@@ -2541,6 +2542,8 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
BasicBlock *BB = BI->getParent();
+ const unsigned PredCount = pred_size(BB);
+
Instruction *Cond = nullptr;
if (BI->isConditional())
Cond = dyn_cast<Instruction>(BI->getCondition());
@@ -2590,7 +2593,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
// too many instructions and these involved instructions can be executed
// unconditionally. We denote all involved instructions except the condition
// as "bonus instructions", and only allow this transformation when the
- // number of the bonus instructions does not exceed a certain threshold.
+ // number of the bonus instructions we'll need to create when cloning into
+ // each predecessor does not exceed a certain threshold.
unsigned NumBonusInsts = 0;
for (auto I = BB->begin(); Cond != &*I; ++I) {
// Ignore dbg intrinsics.
@@ -2605,7 +2609,10 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
// I is used in the same BB. Since BI uses Cond and doesn't have more slots
// to use any other instruction, User must be an instruction between next(I)
// and Cond.
- ++NumBonusInsts;
+
+ // Account for the cost of duplicating this instruction into each
+ // predecessor.
+ NumBonusInsts += PredCount;
// Early exits once we reach the limit.
if (NumBonusInsts > BonusInstThreshold)
return false;
@@ -2711,16 +2718,16 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
// Clone Cond into the predecessor basic block, and or/and the
// two conditions together.
- Instruction *New = Cond->clone();
- RemapInstruction(New, VMap,
+ Instruction *CondInPred = Cond->clone();
+ RemapInstruction(CondInPred, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
- PredBlock->getInstList().insert(PBI->getIterator(), New);
- New->takeName(Cond);
- Cond->setName(New->getName() + ".old");
+ PredBlock->getInstList().insert(PBI->getIterator(), CondInPred);
+ CondInPred->takeName(Cond);
+ Cond->setName(CondInPred->getName() + ".old");
if (BI->isConditional()) {
Instruction *NewCond = cast<Instruction>(
- Builder.CreateBinOp(Opc, PBI->getCondition(), New, "or.cond"));
+ Builder.CreateBinOp(Opc, PBI->getCondition(), CondInPred, "or.cond"));
PBI->setCondition(NewCond);
uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
@@ -2784,7 +2791,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
Instruction *NotCond = cast<Instruction>(
Builder.CreateNot(PBI->getCondition(), "not.cond"));
MergedCond = cast<Instruction>(
- Builder.CreateBinOp(Instruction::And, NotCond, New, "and.cond"));
+ Builder.CreateBinOp(Instruction::And, NotCond, CondInPred,
+ "and.cond"));
if (PBI_C->isOne())
MergedCond = cast<Instruction>(Builder.CreateBinOp(
Instruction::Or, PBI->getCondition(), MergedCond, "or.cond"));
@@ -2793,7 +2801,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
// PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond)
// is false: PBI_Cond and BI_Value
MergedCond = cast<Instruction>(Builder.CreateBinOp(
- Instruction::And, PBI->getCondition(), New, "and.cond"));
+ Instruction::And, PBI->getCondition(), CondInPred, "and.cond"));
if (PBI_C->isOne()) {
Instruction *NotCond = cast<Instruction>(
Builder.CreateNot(PBI->getCondition(), "not.cond"));
@@ -2807,7 +2815,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
}
// Change PBI from Conditional to Unconditional.
BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI);
- EraseTerminatorInstAndDCECond(PBI);
+ EraseTerminatorAndDCECond(PBI);
PBI = New_PBI;
}
@@ -2873,7 +2881,7 @@ static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
if (!AlternativeV)
break;
- assert(pred_size(Succ) == 2);
+ assert(Succ->hasNPredecessors(2));
auto PredI = pred_begin(Succ);
BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
@@ -2922,7 +2930,7 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
isa<StoreInst>(I))
++N;
// Free instructions.
- else if (isa<TerminatorInst>(I) || IsaBitcastOfPointerType(I))
+ else if (I.isTerminator() || IsaBitcastOfPointerType(I))
continue;
else
return false;
@@ -3402,7 +3410,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// Takes care of updating the successors and removing the old terminator.
// Also makes sure not to introduce new successors by assuming that edges to
// non-successor TrueBBs and FalseBBs aren't reachable.
-static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
+static bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
BasicBlock *TrueBB, BasicBlock *FalseBB,
uint32_t TrueWeight,
uint32_t FalseWeight) {
@@ -3414,7 +3422,7 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
// Then remove the rest.
- for (BasicBlock *Succ : OldTerm->successors()) {
+ for (BasicBlock *Succ : successors(OldTerm)) {
// Make sure only to keep exactly one copy of each edge.
if (Succ == KeepEdge1)
KeepEdge1 = nullptr;
@@ -3457,7 +3465,7 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
Builder.CreateBr(FalseBB);
}
- EraseTerminatorInstAndDCECond(OldTerm);
+ EraseTerminatorAndDCECond(OldTerm);
return true;
}
@@ -3534,9 +3542,8 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
///
/// We prefer to split the edge to 'end' so that there is a true/false entry to
/// the PHI, merging the third icmp into the switch.
-static bool tryToSimplifyUncondBranchWithICmpInIt(
- ICmpInst *ICI, IRBuilder<> &Builder, const DataLayout &DL,
- const TargetTransformInfo &TTI, const SimplifyCFGOptions &Options) {
+bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
+ ICmpInst *ICI, IRBuilder<> &Builder) {
BasicBlock *BB = ICI->getParent();
// If the block has any PHIs in it or the icmp has multiple uses, it is too
@@ -3571,7 +3578,7 @@ static bool tryToSimplifyUncondBranchWithICmpInIt(
ICI->eraseFromParent();
}
// BB is now empty, so it is likely to simplify away.
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
}
// Ok, the block is reachable from the default dest. If the constant we're
@@ -3587,7 +3594,7 @@ static bool tryToSimplifyUncondBranchWithICmpInIt(
ICI->replaceAllUsesWith(V);
ICI->eraseFromParent();
// BB is now empty, so it is likely to simplify away.
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
}
// The use of the icmp has to be in the 'end' block, by the only PHI node in
@@ -3701,7 +3708,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
BasicBlock *NewBB =
BB->splitBasicBlock(BI->getIterator(), "switch.early.test");
// Remove the uncond branch added to the old block.
- TerminatorInst *OldTI = BB->getTerminator();
+ Instruction *OldTI = BB->getTerminator();
Builder.SetInsertPoint(OldTI);
if (TrueWhenEqual)
@@ -3745,7 +3752,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
}
// Erase the old branch instruction.
- EraseTerminatorInstAndDCECond(BI);
+ EraseTerminatorAndDCECond(BI);
LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
return true;
@@ -3861,9 +3868,9 @@ bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) {
}
// The landingpad is now unreachable. Zap it.
- BB->eraseFromParent();
if (LoopHeaders)
LoopHeaders->erase(BB);
+ BB->eraseFromParent();
return true;
}
@@ -3993,7 +4000,7 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI) {
if (UnwindDest == nullptr) {
removeUnwindEdge(PredBB);
} else {
- TerminatorInst *TI = PredBB->getTerminator();
+ Instruction *TI = PredBB->getTerminator();
TI->replaceUsesOfWith(BB, UnwindDest);
}
}
@@ -4062,7 +4069,7 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
SmallVector<BranchInst *, 8> CondBranchPreds;
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
BasicBlock *P = *PI;
- TerminatorInst *PTI = P->getTerminator();
+ Instruction *PTI = P->getTerminator();
if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
if (BI->isUnconditional())
UncondBranchPreds.push_back(P);
@@ -4083,9 +4090,9 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
// If we eliminated all predecessors of the block, delete the block now.
if (pred_empty(BB)) {
// We know there are no successors, so just nuke the block.
- BB->eraseFromParent();
if (LoopHeaders)
LoopHeaders->erase(BB);
+ BB->eraseFromParent();
}
return true;
@@ -4167,7 +4174,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
SmallVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
- TerminatorInst *TI = Preds[i]->getTerminator();
+ Instruction *TI = Preds[i]->getTerminator();
IRBuilder<> Builder(TI);
if (auto *BI = dyn_cast<BranchInst>(TI)) {
if (BI->isUnconditional()) {
@@ -4179,10 +4186,10 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
} else {
if (BI->getSuccessor(0) == BB) {
Builder.CreateBr(BI->getSuccessor(1));
- EraseTerminatorInstAndDCECond(BI);
+ EraseTerminatorAndDCECond(BI);
} else if (BI->getSuccessor(1) == BB) {
Builder.CreateBr(BI->getSuccessor(0));
- EraseTerminatorInstAndDCECond(BI);
+ EraseTerminatorAndDCECond(BI);
Changed = true;
}
}
@@ -4245,9 +4252,9 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
// If this block is now dead, remove it.
if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
// We know there are no successors, so just nuke the block.
- BB->eraseFromParent();
if (LoopHeaders)
LoopHeaders->erase(BB);
+ BB->eraseFromParent();
return true;
}
@@ -4424,7 +4431,7 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
SplitBlock(&*NewDefault, &NewDefault->front());
auto *OldTI = NewDefault->getTerminator();
new UnreachableInst(SI->getContext(), OldTI);
- EraseTerminatorInstAndDCECond(OldTI);
+ EraseTerminatorAndDCECond(OldTI);
return true;
}
@@ -4635,12 +4642,12 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
SmallDenseMap<Value *, Constant *> ConstantPool;
ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
for (Instruction &I :CaseDest->instructionsWithoutDebug()) {
- if (TerminatorInst *T = dyn_cast<TerminatorInst>(&I)) {
+ if (I.isTerminator()) {
// If the terminator is a simple branch, continue to the next block.
- if (T->getNumSuccessors() != 1 || T->isExceptional())
+ if (I.getNumSuccessors() != 1 || I.isExceptionalTerminator())
return false;
Pred = CaseDest;
- CaseDest = T->getSuccessor(0);
+ CaseDest = I.getSuccessor(0);
} else if (Constant *C = ConstantFold(&I, DL, ConstantPool)) {
// Instruction is side-effect free and constant.
@@ -5031,6 +5038,9 @@ SwitchLookupTable::SwitchLookupTable(
GlobalVariable::PrivateLinkage, Initializer,
"switch.table." + FuncName);
Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ // Set the alignment to that of an array items. We will be only loading one
+ // value out of it.
+ Array->setAlignment(DL.getPrefTypeAlignment(ValueType));
Kind = ArrayKind;
}
@@ -5257,7 +5267,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// Figure out the corresponding result for each case value and phi node in the
// common destination, as well as the min and max case values.
- assert(SI->case_begin() != SI->case_end());
+ assert(!empty(SI->cases()));
SwitchInst::CaseIt CI = SI->case_begin();
ConstantInt *MinCaseVal = CI->getCaseValue();
ConstantInt *MaxCaseVal = CI->getCaseValue();
@@ -5509,7 +5519,7 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
SmallVector<int64_t,4> Values;
for (auto &C : SI->cases())
Values.push_back(C.getCaseValue()->getValue().getSExtValue());
- llvm::sort(Values.begin(), Values.end());
+ llvm::sort(Values);
// If the switch is already dense, there's nothing useful to do here.
if (isSwitchDense(Values))
@@ -5583,33 +5593,33 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// see if that predecessor totally determines the outcome of this switch.
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
Value *Cond = SI->getCondition();
if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
if (SimplifySwitchOnSelect(SI, Select))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
// If the block only contains the switch, see if we can fold the block
// away into any preds.
if (SI == &*BB->instructionsWithoutDebug().begin())
if (FoldValueComparisonIntoPredecessors(SI, Builder))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
}
// Try to transform the switch into an icmp and a branch.
if (TurnSwitchRangeIntoICmp(SI, Builder))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
// Remove unreachable cases.
if (eliminateDeadSwitchCases(SI, Options.AC, DL))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
if (switchToSelect(SI, Builder, DL, TTI))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
// The conversion from switch to lookup tables results in difficult-to-analyze
// code and makes pruning branches much harder. This is a problem if the
@@ -5618,10 +5628,10 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// optimisation pipeline.
if (Options.ConvertSwitchToLookupTable &&
SwitchToLookupTable(SI, Builder, DL, TTI))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
if (ReduceSwitchRange(SI, Builder, DL, TTI))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
return false;
}
@@ -5646,20 +5656,20 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
if (IBI->getNumDestinations() == 0) {
// If the indirectbr has no successors, change it to unreachable.
new UnreachableInst(IBI->getContext(), IBI);
- EraseTerminatorInstAndDCECond(IBI);
+ EraseTerminatorAndDCECond(IBI);
return true;
}
if (IBI->getNumDestinations() == 1) {
// If the indirectbr has one successor, change it to a direct branch.
BranchInst::Create(IBI->getDestination(0), IBI);
- EraseTerminatorInstAndDCECond(IBI);
+ EraseTerminatorAndDCECond(IBI);
return true;
}
if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
if (SimplifyIndirectBrOnSelect(IBI, SI))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
}
return Changed;
}
@@ -5755,7 +5765,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
// backedge, so we can eliminate BB.
bool NeedCanonicalLoop =
Options.NeedCanonicalLoop &&
- (LoopHeaders && pred_size(BB) > 1 &&
+ (LoopHeaders && BB->hasNPredecessorsOrMore(2) &&
(LoopHeaders->count(BB) || LoopHeaders->count(Succ)));
BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator();
if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
@@ -5769,7 +5779,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
for (++I; isa<DbgInfoIntrinsic>(I); ++I)
;
if (I->isTerminator() &&
- tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, DL, TTI, Options))
+ tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
return true;
}
@@ -5787,7 +5797,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
// predecessor and use logical operations to update the incoming value
// for PHI nodes in common successor.
if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
return false;
}
@@ -5815,18 +5825,18 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// switch.
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
// This block must be empty, except for the setcond inst, if it exists.
// Ignore dbg intrinsics.
auto I = BB->instructionsWithoutDebug().begin();
if (&*I == BI) {
if (FoldValueComparisonIntoPredecessors(BI, Builder))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
} else if (&*I == cast<Instruction>(BI->getCondition())) {
++I;
if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
}
}
@@ -5834,35 +5844,24 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (SimplifyBranchOnICmpChain(BI, Builder, DL))
return true;
- // If this basic block has a single dominating predecessor block and the
- // dominating block's condition implies BI's condition, we know the direction
- // of the BI branch.
- if (BasicBlock *Dom = BB->getSinglePredecessor()) {
- auto *PBI = dyn_cast_or_null<BranchInst>(Dom->getTerminator());
- if (PBI && PBI->isConditional() &&
- PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
- assert(PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB);
- bool CondIsTrue = PBI->getSuccessor(0) == BB;
- Optional<bool> Implication = isImpliedCondition(
- PBI->getCondition(), BI->getCondition(), DL, CondIsTrue);
- if (Implication) {
- // Turn this into a branch on constant.
- auto *OldCond = BI->getCondition();
- ConstantInt *CI = *Implication
- ? ConstantInt::getTrue(BB->getContext())
- : ConstantInt::getFalse(BB->getContext());
- BI->setCondition(CI);
- RecursivelyDeleteTriviallyDeadInstructions(OldCond);
- return simplifyCFG(BB, TTI, Options) | true;
- }
- }
+ // If this basic block has dominating predecessor blocks and the dominating
+ // blocks' conditions imply BI's condition, we know the direction of BI.
+ Optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
+ if (Imp) {
+ // Turn this into a branch on constant.
+ auto *OldCond = BI->getCondition();
+ ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
+ : ConstantInt::getFalse(BB->getContext());
+ BI->setCondition(TorF);
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+ return requestResimplify();
}
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
// We have a conditional branch to two blocks that are only reachable
// from BI. We know that the condbr dominates the two blocks, so see if
@@ -5871,24 +5870,24 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (BI->getSuccessor(0)->getSinglePredecessor()) {
if (BI->getSuccessor(1)->getSinglePredecessor()) {
if (HoistThenElseCodeToIf(BI, TTI))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
} else {
// If Successor #1 has multiple preds, we may be able to conditionally
// execute Successor #0 if it branches to Successor #1.
- TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator();
+ Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
if (Succ0TI->getNumSuccessors() == 1 &&
Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
}
} else if (BI->getSuccessor(1)->getSinglePredecessor()) {
// If Successor #0 has multiple preds, we may be able to conditionally
// execute Successor #1 if it branches to Successor #0.
- TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator();
+ Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
if (Succ1TI->getNumSuccessors() == 1 &&
Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
}
// If this is a branch on a phi node in the current block, thread control
@@ -5896,14 +5895,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
if (PN->getParent() == BI->getParent())
if (FoldCondBranchOnPHI(BI, DL, Options.AC))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
// Scan predecessor blocks for conditional branches.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
if (PBI != BI && PBI->isConditional())
if (SimplifyCondBranchToCondBranch(PBI, BI, DL))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
// Look for diamond patterns.
if (MergeCondStores)
@@ -5911,7 +5910,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
if (PBI != BI && PBI->isConditional())
if (mergeConditionalStores(PBI, BI, DL))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
return false;
}
@@ -5974,7 +5973,7 @@ static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
for (PHINode &PHI : BB->phis())
for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
- TerminatorInst *T = PHI.getIncomingBlock(i)->getTerminator();
+ Instruction *T = PHI.getIncomingBlock(i)->getTerminator();
IRBuilder<> Builder(T);
if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
BB->removePredecessor(PHI.getIncomingBlock(i));
@@ -5994,7 +5993,7 @@ static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
return false;
}
-bool SimplifyCFGOpt::run(BasicBlock *BB) {
+bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
bool Changed = false;
assert(BB && BB->getParent() && "Block not embedded in function!");
@@ -6068,6 +6067,21 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
return Changed;
}
+bool SimplifyCFGOpt::run(BasicBlock *BB) {
+ bool Changed = false;
+
+ // Repeated simplify BB as long as resimplification is requested.
+ do {
+ Resimplify = false;
+
+ // Perform one round of simplifcation. Resimplify flag will be set if
+ // another iteration is requested.
+ Changed |= simplifyOnce(BB);
+ } while (Resimplify);
+
+ return Changed;
+}
+
bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
const SimplifyCFGOptions &Options,
SmallPtrSetImpl<BasicBlock *> *LoopHeaders) {
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index 65b23f4d94a1..7faf291e73d9 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -106,8 +106,9 @@ namespace {
/// Otherwise return null.
Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) {
Value *IVSrc = nullptr;
- unsigned OperIdx = 0;
+ const unsigned OperIdx = 0;
const SCEV *FoldedExpr = nullptr;
+ bool MustDropExactFlag = false;
switch (UseInst->getOpcode()) {
default:
return nullptr;
@@ -140,6 +141,11 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
APInt::getOneBitSet(BitWidth, D->getZExtValue()));
}
FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D));
+ // We might have 'exact' flag set at this point which will no longer be
+ // correct after we make the replacement.
+ if (UseInst->isExact() &&
+ SE->getSCEV(IVSrc) != SE->getMulExpr(FoldedExpr, SE->getSCEV(D)))
+ MustDropExactFlag = true;
}
// We have something that might fold it's operand. Compare SCEVs.
if (!SE->isSCEVable(UseInst->getType()))
@@ -155,6 +161,9 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
UseInst->setOperand(OperIdx, IVSrc);
assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper");
+ if (MustDropExactFlag)
+ UseInst->dropPoisonGeneratingFlags();
+
++NumElimOperand;
Changed = true;
if (IVOperand->use_empty())
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 15e035874002..1bb26caa2af2 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Triple.h"
@@ -22,6 +23,7 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -150,6 +152,32 @@ static bool isLocallyOpenedFile(Value *File, CallInst *CI, IRBuilder<> &B,
return true;
}
+static bool isOnlyUsedInComparisonWithZero(Value *V) {
+ for (User *U : V->users()) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
+ if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+ if (C->isNullValue())
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
+static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len,
+ const DataLayout &DL) {
+ if (!isOnlyUsedInComparisonWithZero(CI))
+ return false;
+
+ if (!isDereferenceableAndAlignedPointer(Str, 1, APInt(64, Len), DL))
+ return false;
+
+ if (CI->getFunction()->hasFnAttribute(Attribute::SanitizeMemory))
+ return false;
+
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// String and Memory Library Call Optimizations
//===----------------------------------------------------------------------===//
@@ -322,6 +350,21 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
B, DL, TLI);
}
+ // strcmp to memcmp
+ if (!HasStr1 && HasStr2) {
+ if (canTransformToMemCmp(CI, Str1P, Len2, DL))
+ return emitMemCmp(
+ Str1P, Str2P,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), B, DL,
+ TLI);
+ } else if (HasStr1 && !HasStr2) {
+ if (canTransformToMemCmp(CI, Str2P, Len1, DL))
+ return emitMemCmp(
+ Str1P, Str2P,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), B, DL,
+ TLI);
+ }
+
return nullptr;
}
@@ -361,6 +404,26 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+ uint64_t Len1 = GetStringLength(Str1P);
+ uint64_t Len2 = GetStringLength(Str2P);
+
+ // strncmp to memcmp
+ if (!HasStr1 && HasStr2) {
+ Len2 = std::min(Len2, Length);
+ if (canTransformToMemCmp(CI, Str1P, Len2, DL))
+ return emitMemCmp(
+ Str1P, Str2P,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), B, DL,
+ TLI);
+ } else if (HasStr1 && !HasStr2) {
+ Len1 = std::min(Len1, Length);
+ if (canTransformToMemCmp(CI, Str2P, Len1, DL))
+ return emitMemCmp(
+ Str1P, Str2P,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), B, DL,
+ TLI);
+ }
+
return nullptr;
}
@@ -735,8 +798,11 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
Bitfield.setBit((unsigned char)C);
Value *BitfieldC = B.getInt(Bitfield);
- // First check that the bit field access is within bounds.
+ // Adjust width of "C" to the bitfield width, then mask off the high bits.
Value *C = B.CreateZExtOrTrunc(CI->getArgOperand(1), BitfieldC->getType());
+ C = B.CreateAnd(C, B.getIntN(Width, 0xFF));
+
+ // First check that the bit field access is within bounds.
Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width),
"memchr.bounds");
@@ -860,8 +926,7 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
}
/// Fold memset[_chk](malloc(n), 0, n) --> calloc(1, n).
-static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B,
- const TargetLibraryInfo &TLI) {
+Value *LibCallSimplifier::foldMallocMemset(CallInst *Memset, IRBuilder<> &B) {
// This has to be a memset of zeros (bzero).
auto *FillValue = dyn_cast<ConstantInt>(Memset->getArgOperand(1));
if (!FillValue || FillValue->getZExtValue() != 0)
@@ -881,7 +946,7 @@ static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B,
return nullptr;
LibFunc Func;
- if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
+ if (!TLI->getLibFunc(*InnerCallee, Func) || !TLI->has(Func) ||
Func != LibFunc_malloc)
return nullptr;
@@ -896,18 +961,18 @@ static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B,
IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext());
Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1),
Malloc->getArgOperand(0), Malloc->getAttributes(),
- B, TLI);
+ B, *TLI);
if (!Calloc)
return nullptr;
Malloc->replaceAllUsesWith(Calloc);
- Malloc->eraseFromParent();
+ eraseFromParent(Malloc);
return Calloc;
}
Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) {
- if (auto *Calloc = foldMallocMemset(CI, B, *TLI))
+ if (auto *Calloc = foldMallocMemset(CI, B))
return Calloc;
// memset(p, v, n) -> llvm.memset(align 1 p, v, n)
@@ -927,6 +992,20 @@ Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilder<> &B) {
// Math Library Optimizations
//===----------------------------------------------------------------------===//
+// Replace a libcall \p CI with a call to intrinsic \p IID
+static Value *replaceUnaryCall(CallInst *CI, IRBuilder<> &B, Intrinsic::ID IID) {
+ // Propagate fast-math flags from the existing call to the new call.
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(CI->getFastMathFlags());
+
+ Module *M = CI->getModule();
+ Value *V = CI->getArgOperand(0);
+ Function *F = Intrinsic::getDeclaration(M, IID, CI->getType());
+ CallInst *NewCall = B.CreateCall(F, V);
+ NewCall->takeName(CI);
+ return NewCall;
+}
+
/// Return a variant of Val with float type.
/// Currently this works in two cases: If Val is an FPExtension of a float
/// value to something bigger, simply return the operand.
@@ -949,104 +1028,75 @@ static Value *valueHasFloatPrecision(Value *Val) {
return nullptr;
}
-/// Shrink double -> float for unary functions like 'floor'.
-static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
- bool CheckRetType) {
- Function *Callee = CI->getCalledFunction();
- // We know this libcall has a valid prototype, but we don't know which.
+/// Shrink double -> float functions.
+static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B,
+ bool isBinary, bool isPrecise = false) {
if (!CI->getType()->isDoubleTy())
return nullptr;
- if (CheckRetType) {
- // Check if all the uses for function like 'sin' are converted to float.
+ // If not all the uses of the function are converted to float, then bail out.
+ // This matters if the precision of the result is more important than the
+ // precision of the arguments.
+ if (isPrecise)
for (User *U : CI->users()) {
FPTruncInst *Cast = dyn_cast<FPTruncInst>(U);
if (!Cast || !Cast->getType()->isFloatTy())
return nullptr;
}
- }
- // If this is something like 'floor((double)floatval)', convert to floorf.
- Value *V = valueHasFloatPrecision(CI->getArgOperand(0));
- if (V == nullptr)
+ // If this is something like 'g((double) float)', convert to 'gf(float)'.
+ Value *V[2];
+ V[0] = valueHasFloatPrecision(CI->getArgOperand(0));
+ V[1] = isBinary ? valueHasFloatPrecision(CI->getArgOperand(1)) : nullptr;
+ if (!V[0] || (isBinary && !V[1]))
return nullptr;
// If call isn't an intrinsic, check that it isn't within a function with the
- // same name as the float version of this call.
+ // same name as the float version of this call, otherwise the result is an
+ // infinite loop. For example, from MinGW-w64:
//
- // e.g. inline float expf(float val) { return (float) exp((double) val); }
- //
- // A similar such definition exists in the MinGW-w64 math.h header file which
- // when compiled with -O2 -ffast-math causes the generation of infinite loops
- // where expf is called.
- if (!Callee->isIntrinsic()) {
- const Function *F = CI->getFunction();
- StringRef FName = F->getName();
- StringRef CalleeName = Callee->getName();
- if ((FName.size() == (CalleeName.size() + 1)) &&
- (FName.back() == 'f') &&
- FName.startswith(CalleeName))
+ // float expf(float val) { return (float) exp((double) val); }
+ Function *CalleeFn = CI->getCalledFunction();
+ StringRef CalleeNm = CalleeFn->getName();
+ AttributeList CalleeAt = CalleeFn->getAttributes();
+ if (CalleeFn && !CalleeFn->isIntrinsic()) {
+ const Function *Fn = CI->getFunction();
+ StringRef FnName = Fn->getName();
+ if (FnName.back() == 'f' &&
+ FnName.size() == (CalleeNm.size() + 1) &&
+ FnName.startswith(CalleeNm))
return nullptr;
}
- // Propagate fast-math flags from the existing call to the new call.
+ // Propagate the math semantics from the current function to the new function.
IRBuilder<>::FastMathFlagGuard Guard(B);
B.setFastMathFlags(CI->getFastMathFlags());
- // floor((double)floatval) -> (double)floorf(floatval)
- if (Callee->isIntrinsic()) {
+ // g((double) float) -> (double) gf(float)
+ Value *R;
+ if (CalleeFn->isIntrinsic()) {
Module *M = CI->getModule();
- Intrinsic::ID IID = Callee->getIntrinsicID();
- Function *F = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
- V = B.CreateCall(F, V);
- } else {
- // The call is a library call rather than an intrinsic.
- V = emitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
+ Intrinsic::ID IID = CalleeFn->getIntrinsicID();
+ Function *Fn = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
+ R = isBinary ? B.CreateCall(Fn, V) : B.CreateCall(Fn, V[0]);
}
+ else
+ R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], CalleeNm, B, CalleeAt)
+ : emitUnaryFloatFnCall(V[0], CalleeNm, B, CalleeAt);
- return B.CreateFPExt(V, B.getDoubleTy());
+ return B.CreateFPExt(R, B.getDoubleTy());
}
-// Replace a libcall \p CI with a call to intrinsic \p IID
-static Value *replaceUnaryCall(CallInst *CI, IRBuilder<> &B, Intrinsic::ID IID) {
- // Propagate fast-math flags from the existing call to the new call.
- IRBuilder<>::FastMathFlagGuard Guard(B);
- B.setFastMathFlags(CI->getFastMathFlags());
-
- Module *M = CI->getModule();
- Value *V = CI->getArgOperand(0);
- Function *F = Intrinsic::getDeclaration(M, IID, CI->getType());
- CallInst *NewCall = B.CreateCall(F, V);
- NewCall->takeName(CI);
- return NewCall;
+/// Shrink double -> float for unary functions.
+static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
+ bool isPrecise = false) {
+ return optimizeDoubleFP(CI, B, false, isPrecise);
}
-/// Shrink double -> float for binary functions like 'fmin/fmax'.
-static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- // We know this libcall has a valid prototype, but we don't know which.
- if (!CI->getType()->isDoubleTy())
- return nullptr;
-
- // If this is something like 'fmin((double)floatval1, (double)floatval2)',
- // or fmin(1.0, (double)floatval), then we convert it to fminf.
- Value *V1 = valueHasFloatPrecision(CI->getArgOperand(0));
- if (V1 == nullptr)
- return nullptr;
- Value *V2 = valueHasFloatPrecision(CI->getArgOperand(1));
- if (V2 == nullptr)
- return nullptr;
-
- // Propagate fast-math flags from the existing call to the new call.
- IRBuilder<>::FastMathFlagGuard Guard(B);
- B.setFastMathFlags(CI->getFastMathFlags());
-
- // fmin((double)floatval1, (double)floatval2)
- // -> (double)fminf(floatval1, floatval2)
- // TODO: Handle intrinsics in the same way as in optimizeUnaryDoubleFP().
- Value *V = emitBinaryFloatFnCall(V1, V2, Callee->getName(), B,
- Callee->getAttributes());
- return B.CreateFPExt(V, B.getDoubleTy());
+/// Shrink double -> float for binary functions.
+static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B,
+ bool isPrecise = false) {
+ return optimizeDoubleFP(CI, B, true, isPrecise);
}
// cabs(z) -> sqrt((creal(z)*creal(z)) + (cimag(z)*cimag(z)))
@@ -1078,20 +1128,39 @@ Value *LibCallSimplifier::optimizeCAbs(CallInst *CI, IRBuilder<> &B) {
return B.CreateCall(FSqrt, B.CreateFAdd(RealReal, ImagImag), "cabs");
}
-Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- Value *Ret = nullptr;
- StringRef Name = Callee->getName();
- if (UnsafeFPShrink && Name == "cos" && hasFloatVersion(Name))
- Ret = optimizeUnaryDoubleFP(CI, B, true);
-
- // cos(-x) -> cos(x)
- Value *Op1 = CI->getArgOperand(0);
- if (BinaryOperator::isFNeg(Op1)) {
- BinaryOperator *BinExpr = cast<BinaryOperator>(Op1);
- return B.CreateCall(Callee, BinExpr->getOperand(1), "cos");
+static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func,
+ IRBuilder<> &B) {
+ if (!isa<FPMathOperator>(Call))
+ return nullptr;
+
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(Call->getFastMathFlags());
+
+ // TODO: Can this be shared to also handle LLVM intrinsics?
+ Value *X;
+ switch (Func) {
+ case LibFunc_sin:
+ case LibFunc_sinf:
+ case LibFunc_sinl:
+ case LibFunc_tan:
+ case LibFunc_tanf:
+ case LibFunc_tanl:
+ // sin(-X) --> -sin(X)
+ // tan(-X) --> -tan(X)
+ if (match(Call->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X)))))
+ return B.CreateFNeg(B.CreateCall(Call->getCalledFunction(), X));
+ break;
+ case LibFunc_cos:
+ case LibFunc_cosf:
+ case LibFunc_cosl:
+ // cos(-X) --> cos(X)
+ if (match(Call->getArgOperand(0), m_FNeg(m_Value(X))))
+ return B.CreateCall(Call->getCalledFunction(), X, "cos");
+ break;
+ default:
+ break;
}
- return Ret;
+ return nullptr;
}
static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) {
@@ -1119,37 +1188,175 @@ static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) {
return InnerChain[Exp];
}
-/// Use square root in place of pow(x, +/-0.5).
-Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
- // TODO: There is some subset of 'fast' under which these transforms should
- // be allowed.
- if (!Pow->isFast())
- return nullptr;
-
- Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
+/// Use exp{,2}(x * y) for pow(exp{,2}(x), y);
+/// exp2(n * x) for pow(2.0 ** n, x); exp10(x) for pow(10.0, x).
+Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
+ Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
+ AttributeList Attrs = Pow->getCalledFunction()->getAttributes();
+ Module *Mod = Pow->getModule();
Type *Ty = Pow->getType();
+ bool Ignored;
- const APFloat *ExpoF;
- if (!match(Expo, m_APFloat(ExpoF)) ||
- (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)))
+ // Evaluate special cases related to a nested function as the base.
+
+ // pow(exp(x), y) -> exp(x * y)
+ // pow(exp2(x), y) -> exp2(x * y)
+ // If exp{,2}() is used only once, it is better to fold two transcendental
+ // math functions into one. If used again, exp{,2}() would still have to be
+ // called with the original argument, then keep both original transcendental
+ // functions. However, this transformation is only safe with fully relaxed
+ // math semantics, since, besides rounding differences, it changes overflow
+ // and underflow behavior quite dramatically. For example:
+ // pow(exp(1000), 0.001) = pow(inf, 0.001) = inf
+ // Whereas:
+ // exp(1000 * 0.001) = exp(1)
+ // TODO: Loosen the requirement for fully relaxed math semantics.
+ // TODO: Handle exp10() when more targets have it available.
+ CallInst *BaseFn = dyn_cast<CallInst>(Base);
+ if (BaseFn && BaseFn->hasOneUse() && BaseFn->isFast() && Pow->isFast()) {
+ LibFunc LibFn;
+
+ Function *CalleeFn = BaseFn->getCalledFunction();
+ if (CalleeFn &&
+ TLI->getLibFunc(CalleeFn->getName(), LibFn) && TLI->has(LibFn)) {
+ StringRef ExpName;
+ Intrinsic::ID ID;
+ Value *ExpFn;
+ LibFunc LibFnFloat;
+ LibFunc LibFnDouble;
+ LibFunc LibFnLongDouble;
+
+ switch (LibFn) {
+ default:
+ return nullptr;
+ case LibFunc_expf: case LibFunc_exp: case LibFunc_expl:
+ ExpName = TLI->getName(LibFunc_exp);
+ ID = Intrinsic::exp;
+ LibFnFloat = LibFunc_expf;
+ LibFnDouble = LibFunc_exp;
+ LibFnLongDouble = LibFunc_expl;
+ break;
+ case LibFunc_exp2f: case LibFunc_exp2: case LibFunc_exp2l:
+ ExpName = TLI->getName(LibFunc_exp2);
+ ID = Intrinsic::exp2;
+ LibFnFloat = LibFunc_exp2f;
+ LibFnDouble = LibFunc_exp2;
+ LibFnLongDouble = LibFunc_exp2l;
+ break;
+ }
+
+ // Create new exp{,2}() with the product as its argument.
+ Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul");
+ ExpFn = BaseFn->doesNotAccessMemory()
+ ? B.CreateCall(Intrinsic::getDeclaration(Mod, ID, Ty),
+ FMul, ExpName)
+ : emitUnaryFloatFnCall(FMul, TLI, LibFnDouble, LibFnFloat,
+ LibFnLongDouble, B,
+ BaseFn->getAttributes());
+
+ // Since the new exp{,2}() is different from the original one, dead code
+ // elimination cannot be trusted to remove it, since it may have side
+ // effects (e.g., errno). When the only consumer for the original
+ // exp{,2}() is pow(), then it has to be explicitly erased.
+ BaseFn->replaceAllUsesWith(ExpFn);
+ eraseFromParent(BaseFn);
+
+ return ExpFn;
+ }
+ }
+
+ // Evaluate special cases related to a constant base.
+
+ const APFloat *BaseF;
+ if (!match(Pow->getArgOperand(0), m_APFloat(BaseF)))
return nullptr;
+ // pow(2.0 ** n, x) -> exp2(n * x)
+ if (hasUnaryFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) {
+ APFloat BaseR = APFloat(1.0);
+ BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored);
+ BaseR = BaseR / *BaseF;
+ bool IsInteger = BaseF->isInteger(),
+ IsReciprocal = BaseR.isInteger();
+ const APFloat *NF = IsReciprocal ? &BaseR : BaseF;
+ APSInt NI(64, false);
+ if ((IsInteger || IsReciprocal) &&
+ !NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) &&
+ NI > 1 && NI.isPowerOf2()) {
+ double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0);
+ Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul");
+ if (Pow->doesNotAccessMemory())
+ return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty),
+ FMul, "exp2");
+ else
+ return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f,
+ LibFunc_exp2l, B, Attrs);
+ }
+ }
+
+ // pow(10.0, x) -> exp10(x)
+ // TODO: There is no exp10() intrinsic yet, but some day there shall be one.
+ if (match(Base, m_SpecificFP(10.0)) &&
+ hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
+ return emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f,
+ LibFunc_exp10l, B, Attrs);
+
+ return nullptr;
+}
+
+static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno,
+ Module *M, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
// If errno is never set, then use the intrinsic for sqrt().
- if (Pow->hasFnAttr(Attribute::ReadNone)) {
- Function *SqrtFn = Intrinsic::getDeclaration(Pow->getModule(),
- Intrinsic::sqrt, Ty);
- Sqrt = B.CreateCall(SqrtFn, Base);
+ if (NoErrno) {
+ Function *SqrtFn =
+ Intrinsic::getDeclaration(M, Intrinsic::sqrt, V->getType());
+ return B.CreateCall(SqrtFn, V, "sqrt");
}
+
// Otherwise, use the libcall for sqrt().
- else if (hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl))
+ if (hasUnaryFloatFn(TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf,
+ LibFunc_sqrtl))
// TODO: We also should check that the target can in fact lower the sqrt()
// libcall. We currently have no way to ask this question, so we ask if
// the target has a sqrt() libcall, which is not exactly the same.
- Sqrt = emitUnaryFloatFnCall(Base, TLI->getName(LibFunc_sqrt), B,
- Pow->getCalledFunction()->getAttributes());
- else
+ return emitUnaryFloatFnCall(V, TLI, LibFunc_sqrt, LibFunc_sqrtf,
+ LibFunc_sqrtl, B, Attrs);
+
+ return nullptr;
+}
+
+/// Use square root in place of pow(x, +/-0.5).
+Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
+ Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
+ AttributeList Attrs = Pow->getCalledFunction()->getAttributes();
+ Module *Mod = Pow->getModule();
+ Type *Ty = Pow->getType();
+
+ const APFloat *ExpoF;
+ if (!match(Expo, m_APFloat(ExpoF)) ||
+ (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)))
return nullptr;
+ Sqrt = getSqrtCall(Base, Attrs, Pow->doesNotAccessMemory(), Mod, B, TLI);
+ if (!Sqrt)
+ return nullptr;
+
+ // Handle signed zero base by expanding to fabs(sqrt(x)).
+ if (!Pow->hasNoSignedZeros()) {
+ Function *FAbsFn = Intrinsic::getDeclaration(Mod, Intrinsic::fabs, Ty);
+ Sqrt = B.CreateCall(FAbsFn, Sqrt, "abs");
+ }
+
+ // Handle non finite base by expanding to
+ // (x == -infinity ? +infinity : sqrt(x)).
+ if (!Pow->hasNoInfs()) {
+ Value *PosInf = ConstantFP::getInfinity(Ty),
+ *NegInf = ConstantFP::getInfinity(Ty, true);
+ Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf");
+ Sqrt = B.CreateSelect(FCmp, PosInf, Sqrt);
+ }
+
// If the exponent is negative, then get the reciprocal.
if (ExpoF->isNegative())
Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt, "reciprocal");
@@ -1160,134 +1367,109 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
Function *Callee = Pow->getCalledFunction();
- AttributeList Attrs = Callee->getAttributes();
StringRef Name = Callee->getName();
- Module *Module = Pow->getModule();
Type *Ty = Pow->getType();
Value *Shrunk = nullptr;
bool Ignored;
- if (UnsafeFPShrink &&
- Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name))
- Shrunk = optimizeUnaryDoubleFP(Pow, B, true);
+ // Bail out if simplifying libcalls to pow() is disabled.
+ if (!hasUnaryFloatFn(TLI, Ty, LibFunc_pow, LibFunc_powf, LibFunc_powl))
+ return nullptr;
// Propagate the math semantics from the call to any created instructions.
IRBuilder<>::FastMathFlagGuard Guard(B);
B.setFastMathFlags(Pow->getFastMathFlags());
+ // Shrink pow() to powf() if the arguments are single precision,
+ // unless the result is expected to be double precision.
+ if (UnsafeFPShrink &&
+ Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name))
+ Shrunk = optimizeBinaryDoubleFP(Pow, B, true);
+
// Evaluate special cases related to the base.
// pow(1.0, x) -> 1.0
- if (match(Base, m_SpecificFP(1.0)))
+ if (match(Base, m_FPOne()))
return Base;
- // pow(2.0, x) -> exp2(x)
- if (match(Base, m_SpecificFP(2.0))) {
- Value *Exp2 = Intrinsic::getDeclaration(Module, Intrinsic::exp2, Ty);
- return B.CreateCall(Exp2, Expo, "exp2");
- }
-
- // pow(10.0, x) -> exp10(x)
- if (ConstantFP *BaseC = dyn_cast<ConstantFP>(Base))
- // There's no exp10 intrinsic yet, but, maybe, some day there shall be one.
- if (BaseC->isExactlyValue(10.0) &&
- hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
- return emitUnaryFloatFnCall(Expo, TLI->getName(LibFunc_exp10), B, Attrs);
-
- // pow(exp(x), y) -> exp(x * y)
- // pow(exp2(x), y) -> exp2(x * y)
- // We enable these only with fast-math. Besides rounding differences, the
- // transformation changes overflow and underflow behavior quite dramatically.
- // Example: x = 1000, y = 0.001.
- // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1).
- auto *BaseFn = dyn_cast<CallInst>(Base);
- if (BaseFn && BaseFn->isFast() && Pow->isFast()) {
- LibFunc LibFn;
- Function *CalleeFn = BaseFn->getCalledFunction();
- if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) &&
- (LibFn == LibFunc_exp || LibFn == LibFunc_exp2) && TLI->has(LibFn)) {
- IRBuilder<>::FastMathFlagGuard Guard(B);
- B.setFastMathFlags(Pow->getFastMathFlags());
-
- Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul");
- return emitUnaryFloatFnCall(FMul, CalleeFn->getName(), B,
- CalleeFn->getAttributes());
- }
- }
+ if (Value *Exp = replacePowWithExp(Pow, B))
+ return Exp;
// Evaluate special cases related to the exponent.
- if (Value *Sqrt = replacePowWithSqrt(Pow, B))
- return Sqrt;
-
- ConstantFP *ExpoC = dyn_cast<ConstantFP>(Expo);
- if (!ExpoC)
- return Shrunk;
-
// pow(x, -1.0) -> 1.0 / x
- if (ExpoC->isExactlyValue(-1.0))
+ if (match(Expo, m_SpecificFP(-1.0)))
return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal");
// pow(x, 0.0) -> 1.0
- if (ExpoC->getValueAPF().isZero())
- return ConstantFP::get(Ty, 1.0);
+ if (match(Expo, m_SpecificFP(0.0)))
+ return ConstantFP::get(Ty, 1.0);
// pow(x, 1.0) -> x
- if (ExpoC->isExactlyValue(1.0))
+ if (match(Expo, m_FPOne()))
return Base;
// pow(x, 2.0) -> x * x
- if (ExpoC->isExactlyValue(2.0))
+ if (match(Expo, m_SpecificFP(2.0)))
return B.CreateFMul(Base, Base, "square");
- // FIXME: Correct the transforms and pull this into replacePowWithSqrt().
- if (ExpoC->isExactlyValue(0.5) &&
- hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) {
- // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
- // This is faster than calling pow(), and still handles -0.0 and
- // negative infinity correctly.
- // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
- Value *PosInf = ConstantFP::getInfinity(Ty);
- Value *NegInf = ConstantFP::getInfinity(Ty, true);
-
- // TODO: As above, we should lower to the sqrt() intrinsic if the pow() is
- // an intrinsic, to match errno semantics.
- Value *Sqrt = emitUnaryFloatFnCall(Base, TLI->getName(LibFunc_sqrt),
- B, Attrs);
- Function *FAbsFn = Intrinsic::getDeclaration(Module, Intrinsic::fabs, Ty);
- Value *FAbs = B.CreateCall(FAbsFn, Sqrt, "abs");
- Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf");
- Sqrt = B.CreateSelect(FCmp, PosInf, FAbs);
+ if (Value *Sqrt = replacePowWithSqrt(Pow, B))
return Sqrt;
- }
- // pow(x, n) -> x * x * x * ....
- if (Pow->isFast()) {
- APFloat ExpoA = abs(ExpoC->getValueAPF());
- // We limit to a max of 7 fmul(s). Thus the maximum exponent is 32.
- // This transformation applies to integer exponents only.
- if (!ExpoA.isInteger() ||
- ExpoA.compare
- (APFloat(ExpoA.getSemantics(), 32.0)) == APFloat::cmpGreaterThan)
- return nullptr;
+ // pow(x, n) -> x * x * x * ...
+ const APFloat *ExpoF;
+ if (Pow->isFast() && match(Expo, m_APFloat(ExpoF))) {
+ // We limit to a max of 7 multiplications, thus the maximum exponent is 32.
+ // If the exponent is an integer+0.5 we generate a call to sqrt and an
+ // additional fmul.
+ // TODO: This whole transformation should be backend specific (e.g. some
+ // backends might prefer libcalls or the limit for the exponent might
+ // be different) and it should also consider optimizing for size.
+ APFloat LimF(ExpoF->getSemantics(), 33.0),
+ ExpoA(abs(*ExpoF));
+ if (ExpoA.compare(LimF) == APFloat::cmpLessThan) {
+ // This transformation applies to integer or integer+0.5 exponents only.
+ // For integer+0.5, we create a sqrt(Base) call.
+ Value *Sqrt = nullptr;
+ if (!ExpoA.isInteger()) {
+ APFloat Expo2 = ExpoA;
+ // To check if ExpoA is an integer + 0.5, we add it to itself. If there
+ // is no floating point exception and the result is an integer, then
+ // ExpoA == integer + 0.5
+ if (Expo2.add(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK)
+ return nullptr;
+
+ if (!Expo2.isInteger())
+ return nullptr;
+
+ Sqrt =
+ getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
+ Pow->doesNotAccessMemory(), Pow->getModule(), B, TLI);
+ }
- // We will memoize intermediate products of the Addition Chain.
- Value *InnerChain[33] = {nullptr};
- InnerChain[1] = Base;
- InnerChain[2] = B.CreateFMul(Base, Base, "square");
+ // We will memoize intermediate products of the Addition Chain.
+ Value *InnerChain[33] = {nullptr};
+ InnerChain[1] = Base;
+ InnerChain[2] = B.CreateFMul(Base, Base, "square");
- // We cannot readily convert a non-double type (like float) to a double.
- // So we first convert it to something which could be converted to double.
- ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored);
- Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B);
+ // We cannot readily convert a non-double type (like float) to a double.
+ // So we first convert it to something which could be converted to double.
+ ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored);
+ Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B);
- // If the exponent is negative, then get the reciprocal.
- if (ExpoC->isNegative())
- FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal");
- return FMul;
+ // Expand pow(x, y+0.5) to pow(x, y) * sqrt(x).
+ if (Sqrt)
+ FMul = B.CreateFMul(FMul, Sqrt);
+
+ // If the exponent is negative, then get the reciprocal.
+ if (ExpoF->isNegative())
+ FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal");
+
+ return FMul;
+ }
}
- return nullptr;
+ return Shrunk;
}
Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
@@ -2285,11 +2467,10 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
if (CI->isStrictFP())
return nullptr;
+ if (Value *V = optimizeTrigReflections(CI, Func, Builder))
+ return V;
+
switch (Func) {
- case LibFunc_cosf:
- case LibFunc_cos:
- case LibFunc_cosl:
- return optimizeCos(CI, Builder);
case LibFunc_sinpif:
case LibFunc_sinpi:
case LibFunc_cospif:
@@ -2344,6 +2525,7 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
case LibFunc_exp:
case LibFunc_exp10:
case LibFunc_expm1:
+ case LibFunc_cos:
case LibFunc_sin:
case LibFunc_sinh:
case LibFunc_tanh:
@@ -2425,7 +2607,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, TmpBuilder)) {
// If we were able to further simplify, remove the now redundant call.
SimplifiedCI->replaceAllUsesWith(V);
- SimplifiedCI->eraseFromParent();
+ eraseFromParent(SimplifiedCI);
return V;
}
}
@@ -2504,15 +2686,20 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
LibCallSimplifier::LibCallSimplifier(
const DataLayout &DL, const TargetLibraryInfo *TLI,
OptimizationRemarkEmitter &ORE,
- function_ref<void(Instruction *, Value *)> Replacer)
+ function_ref<void(Instruction *, Value *)> Replacer,
+ function_ref<void(Instruction *)> Eraser)
: FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE),
- UnsafeFPShrink(false), Replacer(Replacer) {}
+ UnsafeFPShrink(false), Replacer(Replacer), Eraser(Eraser) {}
void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
// Indirect through the replacer used in this instance.
Replacer(I, With);
}
+void LibCallSimplifier::eraseFromParent(Instruction *I) {
+ Eraser(I);
+}
+
// TODO:
// Additional cases that we need to add to this file:
//
diff --git a/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp
index f8d758c54983..5db4d2e4df9d 100644
--- a/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp
@@ -181,14 +181,12 @@ static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap,
std::make_pair(std::distance(GVtoClusterMap.member_begin(I),
GVtoClusterMap.member_end()), I));
- llvm::sort(Sets.begin(), Sets.end(),
- [](const SortType &a, const SortType &b) {
- if (a.first == b.first)
- return a.second->getData()->getName() >
- b.second->getData()->getName();
- else
- return a.first > b.first;
- });
+ llvm::sort(Sets, [](const SortType &a, const SortType &b) {
+ if (a.first == b.first)
+ return a.second->getData()->getName() > b.second->getData()->getName();
+ else
+ return a.first > b.first;
+ });
for (auto &I : Sets) {
unsigned CurrentClusterID = BalancinQueue.top().first;
diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
index afd842f59911..95416de07439 100644
--- a/contrib/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
@@ -26,6 +26,7 @@ using namespace llvm;
void llvm::initializeTransformUtils(PassRegistry &Registry) {
initializeAddDiscriminatorsLegacyPassPass(Registry);
initializeBreakCriticalEdgesPass(Registry);
+ initializeCanonicalizeAliasesLegacyPassPass(Registry);
initializeInstNamerPass(Registry);
initializeLCSSAWrapperPassPass(Registry);
initializeLibCallsShrinkWrapLegacyPassPass(Registry);