aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Transforms/Utils
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils')
-rw-r--r--contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp90
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp191
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp69
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneModule.cpp36
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp227
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp23
-rw-r--r--contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Evaluator.cpp258
-rw-r--r--contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp5
-rw-r--r--contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp13
-rw-r--r--contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp33
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp99
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LCSSA.cpp26
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp27
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Local.cpp850
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp645
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp57
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp195
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp785
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp182
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp43
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp161
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp82
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp33
-rw-r--r--contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp90
-rw-r--r--contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp78
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp191
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp307
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp166
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp152
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp380
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SplitModule.cpp28
-rw-r--r--contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Utils.cpp13
-rw-r--r--contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp12
53 files changed, 4356 insertions, 1309 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
index 0f0668f24db5..e3ef42362223 100644
--- a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -69,7 +69,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
#include <utility>
using namespace llvm;
@@ -114,7 +114,7 @@ static bool shouldHaveDiscriminator(const Instruction *I) {
return !isa<IntrinsicInst>(I) || isa<MemIntrinsic>(I);
}
-/// \brief Assign DWARF discriminators.
+/// Assign DWARF discriminators.
///
/// To assign discriminators, we examine the boundaries of every
/// basic block and its successors. Suppose there is a basic block B1
@@ -210,9 +210,9 @@ static bool addDiscriminators(Function &F) {
// it in 1 byte ULEB128 representation.
unsigned Discriminator = R.second ? ++LDM[L] : LDM[L];
I.setDebugLoc(DIL->setBaseDiscriminator(Discriminator));
- DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
- << DIL->getColumn() << ":" << Discriminator << " " << I
- << "\n");
+ LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
+ << DIL->getColumn() << ":" << Discriminator << " " << I
+ << "\n");
Changed = true;
}
}
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 8f59913e14bb..516a785dce1e 100644
--- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -36,7 +37,6 @@
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>
#include <string>
@@ -45,16 +45,22 @@
using namespace llvm;
-void llvm::DeleteDeadBlock(BasicBlock *BB) {
+void llvm::DeleteDeadBlock(BasicBlock *BB, DeferredDominance *DDT) {
assert((pred_begin(BB) == pred_end(BB) ||
// Can delete self loop.
BB->getSinglePredecessor() == BB) && "Block is not dead!");
TerminatorInst *BBTerm = BB->getTerminator();
+ std::vector<DominatorTree::UpdateType> Updates;
// Loop through all of our successors and make sure they know that one
// of their predecessors is going away.
- for (BasicBlock *Succ : BBTerm->successors())
+ if (DDT)
+ Updates.reserve(BBTerm->getNumSuccessors());
+ for (BasicBlock *Succ : BBTerm->successors()) {
Succ->removePredecessor(BB);
+ if (DDT)
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
+ }
// Zap all the instructions in the block.
while (!BB->empty()) {
@@ -69,8 +75,12 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
BB->getInstList().pop_back();
}
- // Zap the block!
- BB->eraseFromParent();
+ if (DDT) {
+ DDT->applyUpdates(Updates);
+ DDT->deleteBB(BB); // Deferred deletion of BB.
+ } else {
+ BB->eraseFromParent(); // Zap the block!
+ }
}
void llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
@@ -107,9 +117,12 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
LoopInfo *LI,
- MemoryDependenceResults *MemDep) {
- // Don't merge away blocks who have their address taken.
- if (BB->hasAddressTaken()) return false;
+ MemoryDependenceResults *MemDep,
+ DeferredDominance *DDT) {
+ assert(!(DT && DDT) && "Cannot call with both DT and DDT.");
+
+ if (BB->hasAddressTaken())
+ return false;
// Can't merge if there are multiple predecessors, or no predecessors.
BasicBlock *PredBB = BB->getUniquePredecessor();
@@ -121,16 +134,9 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
if (PredBB->getTerminator()->isExceptional())
return false;
- succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB));
- BasicBlock *OnlySucc = BB;
- for (; SI != SE; ++SI)
- if (*SI != OnlySucc) {
- OnlySucc = nullptr; // There are multiple distinct successors!
- break;
- }
-
- // Can't merge if there are multiple successors.
- if (!OnlySucc) return false;
+ // Can't merge if there are multiple distinct successors.
+ if (PredBB->getUniqueSuccessor() != BB)
+ return false;
// Can't merge if there is PHI loop.
for (PHINode &PN : BB->phis())
@@ -139,14 +145,27 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
return false;
// Begin by getting rid of unneeded PHIs.
- SmallVector<Value *, 4> IncomingValues;
+ SmallVector<AssertingVH<Value>, 4> IncomingValues;
if (isa<PHINode>(BB->front())) {
for (PHINode &PN : BB->phis())
- if (PN.getIncomingValue(0) != &PN)
+ if (!isa<PHINode>(PN.getIncomingValue(0)) ||
+ cast<PHINode>(PN.getIncomingValue(0))->getParent() != BB)
IncomingValues.push_back(PN.getIncomingValue(0));
FoldSingleEntryPHINodes(BB, MemDep);
}
+ // Deferred DT update: Collect all the edges that exit BB. These
+ // dominator edges will be redirected from Pred.
+ std::vector<DominatorTree::UpdateType> Updates;
+ if (DDT) {
+ Updates.reserve(1 + (2 * succ_size(BB)));
+ Updates.push_back({DominatorTree::Delete, PredBB, BB});
+ for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ Updates.push_back({DominatorTree::Delete, BB, *I});
+ Updates.push_back({DominatorTree::Insert, PredBB, *I});
+ }
+ }
+
// Delete the unconditional branch from the predecessor...
PredBB->getInstList().pop_back();
@@ -158,8 +177,8 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
// Eliminate duplicate dbg.values describing the entry PHI node post-splice.
- for (auto *Incoming : IncomingValues) {
- if (isa<Instruction>(Incoming)) {
+ for (auto Incoming : IncomingValues) {
+ if (isa<Instruction>(*Incoming)) {
SmallVector<DbgValueInst *, 2> DbgValues;
SmallDenseSet<std::pair<DILocalVariable *, DIExpression *>, 2>
DbgValueSet;
@@ -193,7 +212,12 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
if (MemDep)
MemDep->invalidateCachedPredecessors();
- BB->eraseFromParent();
+ if (DDT) {
+ DDT->deleteBB(BB); // Deferred deletion of BB.
+ DDT->applyUpdates(Updates);
+ } else {
+ BB->eraseFromParent(); // Nuke BB.
+ }
return true;
}
@@ -309,13 +333,21 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
DominatorTree *DT, LoopInfo *LI,
bool PreserveLCSSA, bool &HasLoopExit) {
// Update dominator tree if available.
- if (DT)
- DT->splitBlock(NewBB);
+ if (DT) {
+ if (OldBB == DT->getRootNode()->getBlock()) {
+ assert(NewBB == &NewBB->getParent()->getEntryBlock());
+ DT->setNewRoot(NewBB);
+ } else {
+ // Split block expects NewBB to have a non-empty set of predecessors.
+ DT->splitBlock(NewBB);
+ }
+ }
// The rest of the logic is only relevant for updating the loop structures.
if (!LI)
return;
+ assert(DT && "DT should be available to update LoopInfo!");
Loop *L = LI->getLoopFor(OldBB);
// If we need to preserve loop analyses, collect some information about how
@@ -493,7 +525,6 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// Insert dummy values as the incoming value.
for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
- return NewBB;
}
// Update DominatorTree, LoopInfo, and LCCSA analysis information.
@@ -501,8 +532,11 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
UpdateAnalysisInformation(BB, NewBB, Preds, DT, LI, PreserveLCSSA,
HasLoopExit);
- // Update the PHI nodes in BB with the values coming from NewBB.
- UpdatePHINodes(BB, NewBB, Preds, BI, HasLoopExit);
+ if (!Preds.empty()) {
+ // Update the PHI nodes in BB with the values coming from NewBB.
+ UpdatePHINodes(BB, NewBB, Preds, BI, HasLoopExit);
+ }
+
return NewBB;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 464d1a34f518..3e30c27a9f33 100644
--- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -28,7 +28,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index b60dfb4f3541..5f5c4150d3bb 100644
--- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -105,12 +105,23 @@ static bool setRetNonNull(Function &F) {
return true;
}
+static bool setNonLazyBind(Function &F) {
+ if (F.hasFnAttribute(Attribute::NonLazyBind))
+ return false;
+ F.addFnAttr(Attribute::NonLazyBind);
+ return true;
+}
+
bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
LibFunc TheLibFunc;
if (!(TLI.getLibFunc(F, TheLibFunc) && TLI.has(TheLibFunc)))
return false;
bool Changed = false;
+
+ if (F.getParent() != nullptr && F.getParent()->getRtLibUseGOT())
+ Changed |= setNonLazyBind(F);
+
switch (TheLibFunc) {
case LibFunc_strlen:
case LibFunc_wcslen:
@@ -375,6 +386,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_fseek:
case LibFunc_ftell:
case LibFunc_fgetc:
+ case LibFunc_fgetc_unlocked:
case LibFunc_fseeko:
case LibFunc_ftello:
case LibFunc_fileno:
@@ -393,6 +405,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F);
return Changed;
case LibFunc_fputc:
+ case LibFunc_fputc_unlocked:
case LibFunc_fstat:
case LibFunc_frexp:
case LibFunc_frexpf:
@@ -402,21 +415,25 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_fgets:
+ case LibFunc_fgets_unlocked:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
return Changed;
case LibFunc_fread:
+ case LibFunc_fread_unlocked:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 3);
return Changed;
case LibFunc_fwrite:
+ case LibFunc_fwrite_unlocked:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 3);
// FIXME: readonly #1?
return Changed;
case LibFunc_fputs:
+ case LibFunc_fputs_unlocked:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
@@ -447,6 +464,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_gets:
case LibFunc_getchar:
+ case LibFunc_getchar_unlocked:
Changed |= setDoesNotThrow(F);
return Changed;
case LibFunc_getitimer:
@@ -485,6 +503,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_putc:
+ case LibFunc_putc_unlocked:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
@@ -505,6 +524,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_putchar:
+ case LibFunc_putchar_unlocked:
Changed |= setDoesNotThrow(F);
return Changed;
case LibFunc_popen:
@@ -687,9 +707,9 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setRetNonNull(F);
Changed |= setRetDoesNotAlias(F);
return Changed;
- //TODO: add LibFunc entries for:
- //case LibFunc_memset_pattern4:
- //case LibFunc_memset_pattern8:
+ // TODO: add LibFunc entries for:
+ // case LibFunc_memset_pattern4:
+ // case LibFunc_memset_pattern8:
case LibFunc_memset_pattern16:
Changed |= setOnlyAccessesArgMemory(F);
Changed |= setDoesNotCapture(F, 0);
@@ -709,6 +729,19 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
}
}
+bool llvm::hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
+ LibFunc DoubleFn, LibFunc FloatFn,
+ LibFunc LongDoubleFn) {
+ switch (Ty->getTypeID()) {
+ case Type::FloatTyID:
+ return TLI->has(FloatFn);
+ case Type::DoubleTyID:
+ return TLI->has(DoubleFn);
+ default:
+ return TLI->has(LongDoubleFn);
+ }
+}
+
//- Emit LibCalls ------------------------------------------------------------//
Value *llvm::castToCStr(Value *V, IRBuilder<> &B) {
@@ -973,6 +1006,24 @@ Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilder<> &B,
return CI;
}
+Value *llvm::emitFPutCUnlocked(Value *Char, Value *File, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc_fputc_unlocked))
+ return nullptr;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ Constant *F = M->getOrInsertFunction("fputc_unlocked", B.getInt32Ty(),
+ B.getInt32Ty(), File->getType());
+ if (File->getType()->isPointerTy())
+ inferLibFuncAttributes(*M->getFunction("fputc_unlocked"), *TLI);
+ Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/ true, "chari");
+ CallInst *CI = B.CreateCall(F, {Char, File}, "fputc_unlocked");
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+ return CI;
+}
+
Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc_fputs))
@@ -991,6 +1042,24 @@ Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B,
return CI;
}
+Value *llvm::emitFPutSUnlocked(Value *Str, Value *File, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc_fputs_unlocked))
+ return nullptr;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ StringRef FPutsUnlockedName = TLI->getName(LibFunc_fputs_unlocked);
+ Constant *F = M->getOrInsertFunction(FPutsUnlockedName, B.getInt32Ty(),
+ B.getInt8PtrTy(), File->getType());
+ if (File->getType()->isPointerTy())
+ inferLibFuncAttributes(*M->getFunction(FPutsUnlockedName), *TLI);
+ CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, "fputs_unlocked");
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+ return CI;
+}
+
Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc_fwrite))
@@ -1013,3 +1082,119 @@ Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
CI->setCallingConv(Fn->getCallingConv());
return CI;
}
+
+Value *llvm::emitMalloc(Value *Num, IRBuilder<> &B, const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc_malloc))
+ return nullptr;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *Malloc = M->getOrInsertFunction("malloc", B.getInt8PtrTy(),
+ DL.getIntPtrType(Context));
+ inferLibFuncAttributes(*M->getFunction("malloc"), *TLI);
+ CallInst *CI = B.CreateCall(Malloc, Num, "malloc");
+
+ if (const Function *F = dyn_cast<Function>(Malloc->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+Value *llvm::emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs,
+ IRBuilder<> &B, const TargetLibraryInfo &TLI) {
+ if (!TLI.has(LibFunc_calloc))
+ return nullptr;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ const DataLayout &DL = M->getDataLayout();
+ IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext()));
+ Value *Calloc = M->getOrInsertFunction("calloc", Attrs, B.getInt8PtrTy(),
+ PtrType, PtrType);
+ inferLibFuncAttributes(*M->getFunction("calloc"), TLI);
+ CallInst *CI = B.CreateCall(Calloc, {Num, Size}, "calloc");
+
+ if (const auto *F = dyn_cast<Function>(Calloc->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+Value *llvm::emitFWriteUnlocked(Value *Ptr, Value *Size, Value *N, Value *File,
+ IRBuilder<> &B, const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc_fwrite_unlocked))
+ return nullptr;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ StringRef FWriteUnlockedName = TLI->getName(LibFunc_fwrite_unlocked);
+ Constant *F = M->getOrInsertFunction(
+ FWriteUnlockedName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
+ DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType());
+
+ if (File->getType()->isPointerTy())
+ inferLibFuncAttributes(*M->getFunction(FWriteUnlockedName), *TLI);
+ CallInst *CI = B.CreateCall(F, {castToCStr(Ptr, B), Size, N, File});
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+ return CI;
+}
+
+Value *llvm::emitFGetCUnlocked(Value *File, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc_fgetc_unlocked))
+ return nullptr;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ Constant *F =
+ M->getOrInsertFunction("fgetc_unlocked", B.getInt32Ty(), File->getType());
+ if (File->getType()->isPointerTy())
+ inferLibFuncAttributes(*M->getFunction("fgetc_unlocked"), *TLI);
+ CallInst *CI = B.CreateCall(F, File, "fgetc_unlocked");
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+ return CI;
+}
+
+Value *llvm::emitFGetSUnlocked(Value *Str, Value *Size, Value *File,
+ IRBuilder<> &B, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc_fgets_unlocked))
+ return nullptr;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ Constant *F =
+ M->getOrInsertFunction("fgets_unlocked", B.getInt8PtrTy(),
+ B.getInt8PtrTy(), B.getInt32Ty(), File->getType());
+ inferLibFuncAttributes(*M->getFunction("fgets_unlocked"), *TLI);
+ CallInst *CI =
+ B.CreateCall(F, {castToCStr(Str, B), Size, File}, "fgets_unlocked");
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+ return CI;
+}
+
+Value *llvm::emitFReadUnlocked(Value *Ptr, Value *Size, Value *N, Value *File,
+ IRBuilder<> &B, const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc_fread_unlocked))
+ return nullptr;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ StringRef FReadUnlockedName = TLI->getName(LibFunc_fread_unlocked);
+ Constant *F = M->getOrInsertFunction(
+ FReadUnlockedName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
+ DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType());
+
+ if (File->getType()->isPointerTy())
+ inferLibFuncAttributes(*M->getFunction(FReadUnlockedName), *TLI);
+ CallInst *CI = B.CreateCall(F, {castToCStr(Ptr, B), Size, N, File});
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+ return CI;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
index f711b192f604..05512a6dff3e 100644
--- a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
@@ -34,7 +35,6 @@
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>
@@ -173,7 +173,7 @@ Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
return isDivisionOp() ? Value.Quotient : Value.Remainder;
}
-/// \brief Check if a value looks like a hash.
+/// Check if a value looks like a hash.
///
/// The routine is expected to detect values computed using the most common hash
/// algorithms. Typically, hash computations end with one of the following
diff --git a/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index 5dc6068d4a0b..4d9c22e57a68 100644
--- a/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -389,12 +389,14 @@ Instruction *llvm::promoteCall(CallSite CS, Function *Callee,
// Inspect the arguments of the call site. If an argument's type doesn't
// match the corresponding formal argument's type in the callee, bitcast it
// to the correct type.
- for (Use &U : CS.args()) {
- unsigned ArgNo = CS.getArgumentNo(&U);
- Type *FormalTy = Callee->getFunctionType()->getParamType(ArgNo);
- Type *ActualTy = U.get()->getType();
+ auto CalleeType = Callee->getFunctionType();
+ auto CalleeParamNum = CalleeType->getNumParams();
+ for (unsigned ArgNo = 0; ArgNo < CalleeParamNum; ++ArgNo) {
+ auto *Arg = CS.getArgument(ArgNo);
+ Type *FormalTy = CalleeType->getParamType(ArgNo);
+ Type *ActualTy = Arg->getType();
if (FormalTy != ActualTy) {
- auto *Cast = CastInst::Create(Instruction::BitCast, U.get(), FormalTy, "",
+ auto *Cast = CastInst::Create(Instruction::BitCast, Arg, FormalTy, "",
CS.getInstruction());
CS.setArgument(ArgNo, Cast);
}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 16af2c7b808b..61448e9acb57 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -18,6 +18,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
@@ -31,7 +32,6 @@
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <map>
using namespace llvm;
@@ -43,44 +43,36 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
DebugInfoFinder *DIFinder) {
DenseMap<const MDNode *, MDNode *> Cache;
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
- if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+ if (BB->hasName())
+ NewBB->setName(BB->getName() + NameSuffix);
bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
Module *TheModule = F ? F->getParent() : nullptr;
// Loop over all instructions, and copy them over.
- for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
- II != IE; ++II) {
-
- if (DIFinder && TheModule) {
- if (auto *DDI = dyn_cast<DbgDeclareInst>(II))
- DIFinder->processDeclare(*TheModule, DDI);
- else if (auto *DVI = dyn_cast<DbgValueInst>(II))
- DIFinder->processValue(*TheModule, DVI);
+ for (const Instruction &I : *BB) {
+ if (DIFinder && TheModule)
+ DIFinder->processInstruction(*TheModule, I);
- if (auto DbgLoc = II->getDebugLoc())
- DIFinder->processLocation(*TheModule, DbgLoc.get());
- }
-
- Instruction *NewInst = II->clone();
- if (II->hasName())
- NewInst->setName(II->getName()+NameSuffix);
+ Instruction *NewInst = I.clone();
+ if (I.hasName())
+ NewInst->setName(I.getName() + NameSuffix);
NewBB->getInstList().push_back(NewInst);
- VMap[&*II] = NewInst; // Add instruction map to value.
+ VMap[&I] = NewInst; // Add instruction map to value.
- hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ hasCalls |= (isa<CallInst>(I) && !isa<DbgInfoIntrinsic>(I));
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
if (isa<ConstantInt>(AI->getArraySize()))
hasStaticAllocas = true;
else
hasDynamicAllocas = true;
}
}
-
+
if (CodeInfo) {
CodeInfo->ContainsCalls |= hasCalls;
CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
- CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
+ CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
BB != &BB->getParent()->getEntryBlock();
}
return NewBB;
@@ -175,7 +167,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
// Create a new basic block and copy instructions into it!
BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo,
- SP ? &DIFinder : nullptr);
+ ModuleLevelChanges ? &DIFinder : nullptr);
// Add basic block mapping.
VMap[&BB] = CBB;
@@ -197,15 +189,15 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
Returns.push_back(RI);
}
- for (DISubprogram *ISP : DIFinder.subprograms()) {
- if (ISP != SP) {
+ for (DISubprogram *ISP : DIFinder.subprograms())
+ if (ISP != SP)
VMap.MD()[ISP].reset(ISP);
- }
- }
- for (auto *Type : DIFinder.types()) {
+ for (DICompileUnit *CU : DIFinder.compile_units())
+ VMap.MD()[CU].reset(CU);
+
+ for (DIType *Type : DIFinder.types())
VMap.MD()[Type].reset(Type);
- }
// Loop over all of the instructions in the function, fixing up operand
// references as we go. This uses VMap to do all the hard work.
@@ -283,7 +275,7 @@ namespace {
/// The specified block is found to be reachable, clone it and
/// anything that it can reach.
- void CloneBlock(const BasicBlock *BB,
+ void CloneBlock(const BasicBlock *BB,
BasicBlock::const_iterator StartingInst,
std::vector<const BasicBlock*> &ToClone);
};
@@ -546,7 +538,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// phi nodes will have invalid entries. Update the PHI nodes in this
// case.
PHINode *PN = cast<PHINode>(NewBB->begin());
- NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB));
+ NumPreds = pred_size(NewBB);
if (NumPreds != PN->getNumIncomingValues()) {
assert(NumPreds < PN->getNumIncomingValues());
// Count how many times each predecessor comes to this block.
@@ -718,7 +710,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
ModuleLevelChanges, Returns, NameSuffix, CodeInfo);
}
-/// \brief Remaps instructions in \p Blocks using the mapping in \p VMap.
+/// Remaps instructions in \p Blocks using the mapping in \p VMap.
void llvm::remapInstructionsInBlocks(
const SmallVectorImpl<BasicBlock *> &Blocks, ValueToValueMapTy &VMap) {
// Rewrite the code to refer to itself.
@@ -728,7 +720,7 @@ void llvm::remapInstructionsInBlocks(
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
}
-/// \brief Clones a loop \p OrigLoop. Returns the loop and the blocks in \p
+/// Clones a loop \p OrigLoop. Returns the loop and the blocks in \p
/// Blocks.
///
/// Updates LoopInfo and DominatorTree assuming the loop is dominated by block
@@ -792,12 +784,13 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
return NewLoop;
}
-/// \brief Duplicate non-Phi instructions from the beginning of block up to
+/// Duplicate non-Phi instructions from the beginning of block up to
/// StopAt instruction into a split block between BB and its predecessor.
BasicBlock *
llvm::DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB,
Instruction *StopAt,
- ValueToValueMapTy &ValueMapping) {
+ ValueToValueMapTy &ValueMapping,
+ DominatorTree *DT) {
// We are going to have to map operands from the original BB block to the new
// copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
// account for entry from PredBB.
@@ -805,13 +798,15 @@ llvm::DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB,
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
- BasicBlock *NewBB = SplitEdge(PredBB, BB);
+ BasicBlock *NewBB = SplitEdge(PredBB, BB, DT);
NewBB->setName(PredBB->getName() + ".split");
Instruction *NewTerm = NewBB->getTerminator();
// Clone the non-phi instructions of BB into NewBB, keeping track of the
// mapping and using it to remap operands in the cloned instructions.
- for (; StopAt != &*BI; ++BI) {
+ // Stop once we see the terminator too. This covers the case where BB's
+ // terminator gets replaced and StopAt == BB's terminator.
+ for (; StopAt != &*BI && BB->getTerminator() != &*BI; ++BI) {
Instruction *New = BI->clone();
New->setName(BI->getName());
New->insertBefore(NewTerm);
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
index 8fee10854229..35c7511a24b9 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -32,33 +32,34 @@ static void copyComdat(GlobalObject *Dst, const GlobalObject *Src) {
/// copies of global variables and functions, and making their (initializers and
/// references, respectively) refer to the right globals.
///
-std::unique_ptr<Module> llvm::CloneModule(const Module *M) {
+std::unique_ptr<Module> llvm::CloneModule(const Module &M) {
// Create the value map that maps things from the old module over to the new
// module.
ValueToValueMapTy VMap;
return CloneModule(M, VMap);
}
-std::unique_ptr<Module> llvm::CloneModule(const Module *M,
+std::unique_ptr<Module> llvm::CloneModule(const Module &M,
ValueToValueMapTy &VMap) {
return CloneModule(M, VMap, [](const GlobalValue *GV) { return true; });
}
std::unique_ptr<Module> llvm::CloneModule(
- const Module *M, ValueToValueMapTy &VMap,
+ const Module &M, ValueToValueMapTy &VMap,
function_ref<bool(const GlobalValue *)> ShouldCloneDefinition) {
// First off, we need to create the new module.
std::unique_ptr<Module> New =
- llvm::make_unique<Module>(M->getModuleIdentifier(), M->getContext());
- New->setDataLayout(M->getDataLayout());
- New->setTargetTriple(M->getTargetTriple());
- New->setModuleInlineAsm(M->getModuleInlineAsm());
-
+ llvm::make_unique<Module>(M.getModuleIdentifier(), M.getContext());
+ New->setSourceFileName(M.getSourceFileName());
+ New->setDataLayout(M.getDataLayout());
+ New->setTargetTriple(M.getTargetTriple());
+ New->setModuleInlineAsm(M.getModuleInlineAsm());
+
// Loop over all of the global variables, making corresponding globals in the
// new module. Here we add them to the VMap and to the new Module. We
// don't worry about attributes or initializers, they will come later.
//
- for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
GlobalVariable *GV = new GlobalVariable(*New,
I->getValueType(),
@@ -72,7 +73,7 @@ std::unique_ptr<Module> llvm::CloneModule(
}
// Loop over the functions in the module, making external functions as before
- for (const Function &I : *M) {
+ for (const Function &I : M) {
Function *NF = Function::Create(cast<FunctionType>(I.getValueType()),
I.getLinkage(), I.getName(), New.get());
NF->copyAttributesFrom(&I);
@@ -80,7 +81,7 @@ std::unique_ptr<Module> llvm::CloneModule(
}
// Loop over the aliases in the module
- for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
I != E; ++I) {
if (!ShouldCloneDefinition(&*I)) {
// An alias cannot act as an external reference, so we need to create
@@ -114,7 +115,7 @@ std::unique_ptr<Module> llvm::CloneModule(
// have been created, loop through and copy the global variable referrers
// over... We also set the attributes on the global now.
//
- for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
if (I->isDeclaration())
continue;
@@ -139,7 +140,7 @@ std::unique_ptr<Module> llvm::CloneModule(
// Similarly, copy over function bodies now...
//
- for (const Function &I : *M) {
+ for (const Function &I : M) {
if (I.isDeclaration())
continue;
@@ -169,7 +170,7 @@ std::unique_ptr<Module> llvm::CloneModule(
}
// And aliases
- for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
I != E; ++I) {
// We already dealt with undefined aliases above.
if (!ShouldCloneDefinition(&*I))
@@ -180,8 +181,9 @@ std::unique_ptr<Module> llvm::CloneModule(
}
// And named metadata....
- for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
- E = M->named_metadata_end(); I != E; ++I) {
+ for (Module::const_named_metadata_iterator I = M.named_metadata_begin(),
+ E = M.named_metadata_end();
+ I != E; ++I) {
const NamedMDNode &NMD = *I;
NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
@@ -194,7 +196,7 @@ std::unique_ptr<Module> llvm::CloneModule(
extern "C" {
LLVMModuleRef LLVMCloneModule(LLVMModuleRef M) {
- return wrap(CloneModule(unwrap(M)).release());
+ return wrap(CloneModule(*unwrap(M)).release());
}
}
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 7a404241cb14..f31dab9f96af 100644
--- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -66,6 +66,7 @@
#include <vector>
using namespace llvm;
+using ProfileCount = Function::ProfileCount;
#define DEBUG_TYPE "code-extractor"
@@ -77,12 +78,10 @@ static cl::opt<bool>
AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
cl::desc("Aggregate arguments to code-extracted functions"));
-/// \brief Test whether a block is valid for extraction.
-bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB,
- bool AllowVarArgs) {
- // Landing pads must be in the function where they were inserted for cleanup.
- if (BB.isEHPad())
- return false;
+/// Test whether a block is valid for extraction.
+static bool isBlockValidForExtraction(const BasicBlock &BB,
+ const SetVector<BasicBlock *> &Result,
+ bool AllowVarArgs, bool AllowAlloca) {
// taking the address of a basic block moved to another function is illegal
if (BB.hasAddressTaken())
return false;
@@ -111,11 +110,63 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB,
}
}
- // Don't hoist code containing allocas or invokes. If explicitly requested,
- // allow vastart.
+ // If explicitly requested, allow vastart and alloca. For invoke instructions
+ // verify that extraction is valid.
for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
- if (isa<AllocaInst>(I) || isa<InvokeInst>(I))
- return false;
+ if (isa<AllocaInst>(I)) {
+ if (!AllowAlloca)
+ return false;
+ continue;
+ }
+
+ if (const auto *II = dyn_cast<InvokeInst>(I)) {
+ // Unwind destination (either a landingpad, catchswitch, or cleanuppad)
+ // must be a part of the subgraph which is being extracted.
+ if (auto *UBB = II->getUnwindDest())
+ if (!Result.count(UBB))
+ return false;
+ continue;
+ }
+
+ // All catch handlers of a catchswitch instruction as well as the unwind
+ // destination must be in the subgraph.
+ if (const auto *CSI = dyn_cast<CatchSwitchInst>(I)) {
+ if (auto *UBB = CSI->getUnwindDest())
+ if (!Result.count(UBB))
+ return false;
+ for (auto *HBB : CSI->handlers())
+ if (!Result.count(const_cast<BasicBlock*>(HBB)))
+ return false;
+ continue;
+ }
+
+ // Make sure that entire catch handler is within subgraph. It is sufficient
+ // to check that catch return's block is in the list.
+ if (const auto *CPI = dyn_cast<CatchPadInst>(I)) {
+ for (const auto *U : CPI->users())
+ if (const auto *CRI = dyn_cast<CatchReturnInst>(U))
+ if (!Result.count(const_cast<BasicBlock*>(CRI->getParent())))
+ return false;
+ continue;
+ }
+
+ // And do similar checks for cleanup handler - the entire handler must be
+ // in subgraph which is going to be extracted. For cleanup return should
+ // additionally check that the unwind destination is also in the subgraph.
+ if (const auto *CPI = dyn_cast<CleanupPadInst>(I)) {
+ for (const auto *U : CPI->users())
+ if (const auto *CRI = dyn_cast<CleanupReturnInst>(U))
+ if (!Result.count(const_cast<BasicBlock*>(CRI->getParent())))
+ return false;
+ continue;
+ }
+ if (const auto *CRI = dyn_cast<CleanupReturnInst>(I)) {
+ if (auto *UBB = CRI->getUnwindDest())
+ if (!Result.count(UBB))
+ return false;
+ continue;
+ }
+
if (const CallInst *CI = dyn_cast<CallInst>(I))
if (const Function *F = CI->getCalledFunction())
if (F->getIntrinsicID() == Intrinsic::vastart) {
@@ -129,10 +180,10 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB,
return true;
}
-/// \brief Build a set of blocks to extract if the input blocks are viable.
+/// Build a set of blocks to extract if the input blocks are viable.
static SetVector<BasicBlock *>
buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
- bool AllowVarArgs) {
+ bool AllowVarArgs, bool AllowAlloca) {
assert(!BBs.empty() && "The set of blocks to extract must be non-empty");
SetVector<BasicBlock *> Result;
@@ -145,32 +196,42 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
if (!Result.insert(BB))
llvm_unreachable("Repeated basic blocks in extraction input");
- if (!CodeExtractor::isBlockValidForExtraction(*BB, AllowVarArgs)) {
- Result.clear();
- return Result;
- }
}
-#ifndef NDEBUG
- for (SetVector<BasicBlock *>::iterator I = std::next(Result.begin()),
- E = Result.end();
- I != E; ++I)
- for (pred_iterator PI = pred_begin(*I), PE = pred_end(*I);
- PI != PE; ++PI)
- assert(Result.count(*PI) &&
- "No blocks in this region may have entries from outside the region"
- " except for the first block!");
-#endif
+ for (auto *BB : Result) {
+ if (!isBlockValidForExtraction(*BB, Result, AllowVarArgs, AllowAlloca))
+ return {};
+
+ // Make sure that the first block is not a landing pad.
+ if (BB == Result.front()) {
+ if (BB->isEHPad()) {
+ LLVM_DEBUG(dbgs() << "The first block cannot be an unwind block\n");
+ return {};
+ }
+ continue;
+ }
+
+ // All blocks other than the first must not have predecessors outside of
+ // the subgraph which is being extracted.
+ for (auto *PBB : predecessors(BB))
+ if (!Result.count(PBB)) {
+ LLVM_DEBUG(
+ dbgs() << "No blocks in this region may have entries from "
+ "outside the region except for the first block!\n");
+ return {};
+ }
+ }
return Result;
}
CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
bool AggregateArgs, BlockFrequencyInfo *BFI,
- BranchProbabilityInfo *BPI, bool AllowVarArgs)
+ BranchProbabilityInfo *BPI, bool AllowVarArgs,
+ bool AllowAlloca)
: DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
BPI(BPI), AllowVarArgs(AllowVarArgs),
- Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs)) {}
+ Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)) {}
CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
BlockFrequencyInfo *BFI,
@@ -178,7 +239,8 @@ CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
: DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
BPI(BPI), AllowVarArgs(false),
Blocks(buildExtractionBlockSet(L.getBlocks(), &DT,
- /* AllowVarArgs */ false)) {}
+ /* AllowVarArgs */ false,
+ /* AllowAlloca */ false)) {}
/// definedInRegion - Return true if the specified value is defined in the
/// extracted region.
@@ -562,8 +624,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
BasicBlock *newHeader,
Function *oldFunction,
Module *M) {
- DEBUG(dbgs() << "inputs: " << inputs.size() << "\n");
- DEBUG(dbgs() << "outputs: " << outputs.size() << "\n");
+ LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n");
+ LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n");
// This function returns unsigned, outputs will go back by reference.
switch (NumExitBlocks) {
@@ -577,20 +639,20 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
// Add the types of the input values to the function's argument list
for (Value *value : inputs) {
- DEBUG(dbgs() << "value used in func: " << *value << "\n");
+ LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n");
paramTy.push_back(value->getType());
}
// Add the types of the output values to the function's argument list.
for (Value *output : outputs) {
- DEBUG(dbgs() << "instr used in func: " << *output << "\n");
+ LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n");
if (AggregateArgs)
paramTy.push_back(output->getType());
else
paramTy.push_back(PointerType::getUnqual(output->getType()));
}
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Function type: " << *RetTy << " f(";
for (Type *i : paramTy)
dbgs() << *i << ", ";
@@ -620,16 +682,89 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
if (oldFunction->hasUWTable())
newFunction->setHasUWTable();
- // Inherit all of the target dependent attributes.
+ // Inherit all of the target dependent attributes and white-listed
+ // target independent attributes.
// (e.g. If the extracted region contains a call to an x86.sse
// instruction we need to make sure that the extracted region has the
// "target-features" attribute allowing it to be lowered.
// FIXME: This should be changed to check to see if a specific
// attribute can not be inherited.
- AttrBuilder AB(oldFunction->getAttributes().getFnAttributes());
- for (const auto &Attr : AB.td_attrs())
- newFunction->addFnAttr(Attr.first, Attr.second);
+ for (const auto &Attr : oldFunction->getAttributes().getFnAttributes()) {
+ if (Attr.isStringAttribute()) {
+ if (Attr.getKindAsString() == "thunk")
+ continue;
+ } else
+ switch (Attr.getKindAsEnum()) {
+ // Those attributes cannot be propagated safely. Explicitly list them
+ // here so we get a warning if new attributes are added. This list also
+ // includes non-function attributes.
+ case Attribute::Alignment:
+ case Attribute::AllocSize:
+ case Attribute::ArgMemOnly:
+ case Attribute::Builtin:
+ case Attribute::ByVal:
+ case Attribute::Convergent:
+ case Attribute::Dereferenceable:
+ case Attribute::DereferenceableOrNull:
+ case Attribute::InAlloca:
+ case Attribute::InReg:
+ case Attribute::InaccessibleMemOnly:
+ case Attribute::InaccessibleMemOrArgMemOnly:
+ case Attribute::JumpTable:
+ case Attribute::Naked:
+ case Attribute::Nest:
+ case Attribute::NoAlias:
+ case Attribute::NoBuiltin:
+ case Attribute::NoCapture:
+ case Attribute::NoReturn:
+ case Attribute::None:
+ case Attribute::NonNull:
+ case Attribute::ReadNone:
+ case Attribute::ReadOnly:
+ case Attribute::Returned:
+ case Attribute::ReturnsTwice:
+ case Attribute::SExt:
+ case Attribute::Speculatable:
+ case Attribute::StackAlignment:
+ case Attribute::StructRet:
+ case Attribute::SwiftError:
+ case Attribute::SwiftSelf:
+ case Attribute::WriteOnly:
+ case Attribute::ZExt:
+ case Attribute::EndAttrKinds:
+ continue;
+ // Those attributes should be safe to propagate to the extracted function.
+ case Attribute::AlwaysInline:
+ case Attribute::Cold:
+ case Attribute::NoRecurse:
+ case Attribute::InlineHint:
+ case Attribute::MinSize:
+ case Attribute::NoDuplicate:
+ case Attribute::NoImplicitFloat:
+ case Attribute::NoInline:
+ case Attribute::NonLazyBind:
+ case Attribute::NoRedZone:
+ case Attribute::NoUnwind:
+ case Attribute::OptForFuzzing:
+ case Attribute::OptimizeNone:
+ case Attribute::OptimizeForSize:
+ case Attribute::SafeStack:
+ case Attribute::ShadowCallStack:
+ case Attribute::SanitizeAddress:
+ case Attribute::SanitizeMemory:
+ case Attribute::SanitizeThread:
+ case Attribute::SanitizeHWAddress:
+ case Attribute::StackProtect:
+ case Attribute::StackProtectReq:
+ case Attribute::StackProtectStrong:
+ case Attribute::StrictFP:
+ case Attribute::UWTable:
+ case Attribute::NoCfCheck:
+ break;
+ }
+ newFunction->addFnAttr(Attr);
+ }
newFunction->getBasicBlockList().push_back(newRootNode);
// Create an iterator to name all of the arguments we inserted.
@@ -1093,10 +1228,10 @@ Function *CodeExtractor::extractCodeRegion() {
// Update the entry count of the function.
if (BFI) {
- Optional<uint64_t> EntryCount =
- BFI->getProfileCountFromFreq(EntryFreq.getFrequency());
- if (EntryCount.hasValue())
- newFunction->setEntryCount(EntryCount.getValue());
+ auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency());
+ if (Count.hasValue())
+ newFunction->setEntryCount(
+ ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME
BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency());
}
@@ -1104,6 +1239,10 @@ Function *CodeExtractor::extractCodeRegion() {
moveCodeToFunction(newFunction);
+ // Propagate personality info to the new function if there is one.
+ if (oldFunction->hasPersonalityFn())
+ newFunction->setPersonalityFn(oldFunction->getPersonalityFn());
+
// Update the branch weights for the exit block.
if (BFI && NumExitBlocks > 1)
calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI);
@@ -1139,7 +1278,7 @@ Function *CodeExtractor::extractCodeRegion() {
}
}
- DEBUG(if (verifyFunction(*newFunction))
- report_fatal_error("verifyFunction failed!"));
+ LLVM_DEBUG(if (verifyFunction(*newFunction))
+ report_fatal_error("verifyFunction failed!"));
return newFunction;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
index 82b67c293102..9a0240144d08 100644
--- a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
@@ -138,7 +138,7 @@ bool optimizeGlobalCtorsList(Module &M,
if (!F)
continue;
- DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n");
+ LLVM_DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n");
// We cannot simplify external ctor functions.
if (F->empty())
diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
index 6d3d287defdb..56ff03c7f5e1 100644
--- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -9,11 +9,11 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
/// DemoteRegToStack - This function takes a virtual register computed by an
diff --git a/contrib/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/contrib/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
index 421663f82565..569ea58a3047 100644
--- a/contrib/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -9,14 +9,13 @@
#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
using namespace llvm;
static void insertCall(Function &CurFn, StringRef Func,
@@ -92,17 +91,27 @@ static bool runOnFunction(Function &F, bool PostInlining) {
if (!ExitFunc.empty()) {
for (BasicBlock &BB : F) {
- TerminatorInst *T = BB.getTerminator();
+ Instruction *T = BB.getTerminator();
+ if (!isa<ReturnInst>(T))
+ continue;
+
+ // If T is preceded by a musttail call, that's the real terminator.
+ Instruction *Prev = T->getPrevNode();
+ if (BitCastInst *BCI = dyn_cast_or_null<BitCastInst>(Prev))
+ Prev = BCI->getPrevNode();
+ if (CallInst *CI = dyn_cast_or_null<CallInst>(Prev)) {
+ if (CI->isMustTailCall())
+ T = CI;
+ }
+
DebugLoc DL;
if (DebugLoc TerminatorDL = T->getDebugLoc())
DL = TerminatorDL;
else if (auto SP = F.getSubprogram())
DL = DebugLoc::get(0, 0, SP);
- if (isa<ReturnInst>(T)) {
- insertCall(F, ExitFunc, T, DL);
- Changed = true;
- }
+ insertCall(F, ExitFunc, T, DL);
+ Changed = true;
}
F.removeAttribute(AttributeList::FunctionIndex, ExitAttr);
}
diff --git a/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp b/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
index 78d7474e5b95..c9c96fbe5da0 100644
--- a/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
@@ -14,9 +14,9 @@
#include "llvm/Transforms/Utils/EscapeEnumerator.h"
#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Module.h"
-#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
static Constant *getDefaultPersonalityFn(Module *M) {
@@ -73,8 +73,8 @@ IRBuilder<> *EscapeEnumerator::Next() {
F.setPersonalityFn(PersFn);
}
- if (isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) {
- report_fatal_error("Funclet EH not supported");
+ if (isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) {
+ report_fatal_error("Scoped EH not supported");
}
LandingPadInst *LPad =
diff --git a/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp b/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp
index 3c5e299fae98..7fd9425efed3 100644
--- a/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp
@@ -24,6 +24,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InstrTypes.h"
@@ -174,6 +175,11 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
return false;
}
+static Constant *getInitializer(Constant *C) {
+ auto *GV = dyn_cast<GlobalVariable>(C);
+ return GV && GV->hasDefinitiveInitializer() ? GV->getInitializer() : nullptr;
+}
+
/// Return the value that would be computed by a load from P after the stores
/// reflected by 'memory' have been performed. If we can't decide, return null.
Constant *Evaluator::ComputeLoadResult(Constant *P) {
@@ -189,18 +195,96 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) {
return nullptr;
}
- // Handle a constantexpr getelementptr.
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
- if (CE->getOpcode() == Instruction::GetElementPtr &&
- isa<GlobalVariable>(CE->getOperand(0))) {
- GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
- if (GV->hasDefinitiveInitializer())
- return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P)) {
+ switch (CE->getOpcode()) {
+ // Handle a constantexpr getelementptr.
+ case Instruction::GetElementPtr:
+ if (auto *I = getInitializer(CE->getOperand(0)))
+ return ConstantFoldLoadThroughGEPConstantExpr(I, CE);
+ break;
+ // Handle a constantexpr bitcast.
+ case Instruction::BitCast:
+ Constant *Val = getVal(CE->getOperand(0));
+ auto MM = MutatedMemory.find(Val);
+ auto *I = (MM != MutatedMemory.end()) ? MM->second
+ : getInitializer(CE->getOperand(0));
+ if (I)
+ return ConstantFoldLoadThroughBitcast(
+ I, P->getType()->getPointerElementType(), DL);
+ break;
}
+ }
return nullptr; // don't know how to evaluate.
}
+static Function *getFunction(Constant *C) {
+ if (auto *Fn = dyn_cast<Function>(C))
+ return Fn;
+
+ if (auto *Alias = dyn_cast<GlobalAlias>(C))
+ if (auto *Fn = dyn_cast<Function>(Alias->getAliasee()))
+ return Fn;
+ return nullptr;
+}
+
+Function *
+Evaluator::getCalleeWithFormalArgs(CallSite &CS,
+ SmallVector<Constant *, 8> &Formals) {
+ auto *V = CS.getCalledValue();
+ if (auto *Fn = getFunction(getVal(V)))
+ return getFormalParams(CS, Fn, Formals) ? Fn : nullptr;
+
+ auto *CE = dyn_cast<ConstantExpr>(V);
+ if (!CE || CE->getOpcode() != Instruction::BitCast ||
+ !getFormalParams(CS, getFunction(CE->getOperand(0)), Formals))
+ return nullptr;
+
+ return dyn_cast<Function>(
+ ConstantFoldLoadThroughBitcast(CE, CE->getOperand(0)->getType(), DL));
+}
+
+bool Evaluator::getFormalParams(CallSite &CS, Function *F,
+ SmallVector<Constant *, 8> &Formals) {
+ if (!F)
+ return false;
+
+ auto *FTy = F->getFunctionType();
+ if (FTy->getNumParams() > CS.getNumArgOperands()) {
+ LLVM_DEBUG(dbgs() << "Too few arguments for function.\n");
+ return false;
+ }
+
+ auto ArgI = CS.arg_begin();
+ for (auto ParI = FTy->param_begin(), ParE = FTy->param_end(); ParI != ParE;
+ ++ParI) {
+ auto *ArgC = ConstantFoldLoadThroughBitcast(getVal(*ArgI), *ParI, DL);
+ if (!ArgC) {
+ LLVM_DEBUG(dbgs() << "Can not convert function argument.\n");
+ return false;
+ }
+ Formals.push_back(ArgC);
+ ++ArgI;
+ }
+ return true;
+}
+
+/// If call expression contains bitcast then we may need to cast
+/// evaluated return value to a type of the call expression.
+Constant *Evaluator::castCallResultIfNeeded(Value *CallExpr, Constant *RV) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(CallExpr);
+ if (!RV || !CE || CE->getOpcode() != Instruction::BitCast)
+ return RV;
+
+ if (auto *FT =
+ dyn_cast<FunctionType>(CE->getType()->getPointerElementType())) {
+ RV = ConstantFoldLoadThroughBitcast(RV, FT->getReturnType(), DL);
+ if (!RV)
+ LLVM_DEBUG(dbgs() << "Failed to fold bitcast call expr\n");
+ }
+ return RV;
+}
+
/// Evaluate all instructions in block BB, returning true if successful, false
/// if we can't evaluate it. NewBB returns the next BB that control flows into,
/// or null upon return.
@@ -210,22 +294,23 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
while (true) {
Constant *InstResult = nullptr;
- DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");
+ LLVM_DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");
if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
if (!SI->isSimple()) {
- DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n");
+ LLVM_DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n");
return false; // no volatile/atomic accesses.
}
Constant *Ptr = getVal(SI->getOperand(1));
if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI)) {
- DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr);
+ LLVM_DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr);
Ptr = FoldedPtr;
- DEBUG(dbgs() << "; To: " << *Ptr << "\n");
+ LLVM_DEBUG(dbgs() << "; To: " << *Ptr << "\n");
}
if (!isSimpleEnoughPointerToCommit(Ptr)) {
// If this is too complex for us to commit, reject it.
- DEBUG(dbgs() << "Pointer is too complex for us to evaluate store.");
+ LLVM_DEBUG(
+ dbgs() << "Pointer is too complex for us to evaluate store.");
return false;
}
@@ -234,14 +319,15 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
// If this might be too difficult for the backend to handle (e.g. the addr
// of one global variable divided by another) then we can't commit it.
if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) {
- DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val
- << "\n");
+ LLVM_DEBUG(dbgs() << "Store value is too complex to evaluate store. "
+ << *Val << "\n");
return false;
}
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
if (CE->getOpcode() == Instruction::BitCast) {
- DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n");
+ LLVM_DEBUG(dbgs()
+ << "Attempting to resolve bitcast on constant ptr.\n");
// If we're evaluating a store through a bitcast, then we need
// to pull the bitcast off the pointer type and push it onto the
// stored value.
@@ -252,7 +338,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
// In order to push the bitcast onto the stored value, a bitcast
// from NewTy to Val's type must be legal. If it's not, we can try
// introspecting NewTy to find a legal conversion.
- while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) {
+ Constant *NewVal;
+ while (!(NewVal = ConstantFoldLoadThroughBitcast(Val, NewTy, DL))) {
// If NewTy is a struct, we can convert the pointer to the struct
// into a pointer to its first member.
// FIXME: This could be extended to support arrays as well.
@@ -270,17 +357,14 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
// If we can't improve the situation by introspecting NewTy,
// we have to give up.
} else {
- DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
- "evaluate.\n");
+ LLVM_DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
+ "evaluate.\n");
return false;
}
}
- // If we found compatible types, go ahead and push the bitcast
- // onto the stored value.
- Val = ConstantExpr::getBitCast(Val, NewTy);
-
- DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n");
+ Val = NewVal;
+ LLVM_DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n");
}
}
@@ -289,37 +373,37 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
InstResult = ConstantExpr::get(BO->getOpcode(),
getVal(BO->getOperand(0)),
getVal(BO->getOperand(1)));
- DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult
- << "\n");
+ LLVM_DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: "
+ << *InstResult << "\n");
} else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
InstResult = ConstantExpr::getCompare(CI->getPredicate(),
getVal(CI->getOperand(0)),
getVal(CI->getOperand(1)));
- DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult
- << "\n");
+ LLVM_DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult
+ << "\n");
} else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
InstResult = ConstantExpr::getCast(CI->getOpcode(),
getVal(CI->getOperand(0)),
CI->getType());
- DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult
- << "\n");
+ LLVM_DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult
+ << "\n");
} else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)),
getVal(SI->getOperand(1)),
getVal(SI->getOperand(2)));
- DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult
- << "\n");
+ LLVM_DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult
+ << "\n");
} else if (auto *EVI = dyn_cast<ExtractValueInst>(CurInst)) {
InstResult = ConstantExpr::getExtractValue(
getVal(EVI->getAggregateOperand()), EVI->getIndices());
- DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: " << *InstResult
- << "\n");
+ LLVM_DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: "
+ << *InstResult << "\n");
} else if (auto *IVI = dyn_cast<InsertValueInst>(CurInst)) {
InstResult = ConstantExpr::getInsertValue(
getVal(IVI->getAggregateOperand()),
getVal(IVI->getInsertedValueOperand()), IVI->getIndices());
- DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: " << *InstResult
- << "\n");
+ LLVM_DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: "
+ << *InstResult << "\n");
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
Constant *P = getVal(GEP->getOperand(0));
SmallVector<Constant*, 8> GEPOps;
@@ -329,60 +413,63 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
InstResult =
ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps,
cast<GEPOperator>(GEP)->isInBounds());
- DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult
- << "\n");
+ LLVM_DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult << "\n");
} else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
if (!LI->isSimple()) {
- DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n");
+ LLVM_DEBUG(
+ dbgs() << "Found a Load! Not a simple load, can not evaluate.\n");
return false; // no volatile/atomic accesses.
}
Constant *Ptr = getVal(LI->getOperand(0));
if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI)) {
Ptr = FoldedPtr;
- DEBUG(dbgs() << "Found a constant pointer expression, constant "
- "folding: " << *Ptr << "\n");
+ LLVM_DEBUG(dbgs() << "Found a constant pointer expression, constant "
+ "folding: "
+ << *Ptr << "\n");
}
InstResult = ComputeLoadResult(Ptr);
if (!InstResult) {
- DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load."
- "\n");
+ LLVM_DEBUG(
+ dbgs() << "Failed to compute load result. Can not evaluate load."
+ "\n");
return false; // Could not evaluate load.
}
- DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n");
+ LLVM_DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n");
} else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
if (AI->isArrayAllocation()) {
- DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n");
+ LLVM_DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n");
return false; // Cannot handle array allocs.
}
Type *Ty = AI->getAllocatedType();
AllocaTmps.push_back(llvm::make_unique<GlobalVariable>(
Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty),
- AI->getName()));
+ AI->getName(), /*TLMode=*/GlobalValue::NotThreadLocal,
+ AI->getType()->getPointerAddressSpace()));
InstResult = AllocaTmps.back().get();
- DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
+ LLVM_DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
} else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
CallSite CS(&*CurInst);
// Debug info can safely be ignored here.
if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
- DEBUG(dbgs() << "Ignoring debug info.\n");
+ LLVM_DEBUG(dbgs() << "Ignoring debug info.\n");
++CurInst;
continue;
}
// Cannot handle inline asm.
if (isa<InlineAsm>(CS.getCalledValue())) {
- DEBUG(dbgs() << "Found inline asm, can not evaluate.\n");
+ LLVM_DEBUG(dbgs() << "Found inline asm, can not evaluate.\n");
return false;
}
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) {
if (MSI->isVolatile()) {
- DEBUG(dbgs() << "Can not optimize a volatile memset " <<
- "intrinsic.\n");
+ LLVM_DEBUG(dbgs() << "Can not optimize a volatile memset "
+ << "intrinsic.\n");
return false;
}
Constant *Ptr = getVal(MSI->getDest());
@@ -390,7 +477,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
Constant *DestVal = ComputeLoadResult(getVal(Ptr));
if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
// This memset is a no-op.
- DEBUG(dbgs() << "Ignoring no-op memset.\n");
+ LLVM_DEBUG(dbgs() << "Ignoring no-op memset.\n");
++CurInst;
continue;
}
@@ -398,7 +485,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
II->getIntrinsicID() == Intrinsic::lifetime_end) {
- DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n");
+ LLVM_DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n");
++CurInst;
continue;
}
@@ -407,7 +494,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
// We don't insert an entry into Values, as it doesn't have a
// meaningful return value.
if (!II->use_empty()) {
- DEBUG(dbgs() << "Found unused invariant_start. Can't evaluate.\n");
+ LLVM_DEBUG(dbgs()
+ << "Found unused invariant_start. Can't evaluate.\n");
return false;
}
ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0));
@@ -419,54 +507,54 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
Size->getValue().getLimitedValue() >=
DL.getTypeStoreSize(ElemTy)) {
Invariants.insert(GV);
- DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV
- << "\n");
+ LLVM_DEBUG(dbgs() << "Found a global var that is an invariant: "
+ << *GV << "\n");
} else {
- DEBUG(dbgs() << "Found a global var, but can not treat it as an "
- "invariant.\n");
+ LLVM_DEBUG(dbgs()
+ << "Found a global var, but can not treat it as an "
+ "invariant.\n");
}
}
// Continue even if we do nothing.
++CurInst;
continue;
} else if (II->getIntrinsicID() == Intrinsic::assume) {
- DEBUG(dbgs() << "Skipping assume intrinsic.\n");
+ LLVM_DEBUG(dbgs() << "Skipping assume intrinsic.\n");
++CurInst;
continue;
} else if (II->getIntrinsicID() == Intrinsic::sideeffect) {
- DEBUG(dbgs() << "Skipping sideeffect intrinsic.\n");
+ LLVM_DEBUG(dbgs() << "Skipping sideeffect intrinsic.\n");
++CurInst;
continue;
}
- DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
+ LLVM_DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
return false;
}
// Resolve function pointers.
- Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue()));
+ SmallVector<Constant *, 8> Formals;
+ Function *Callee = getCalleeWithFormalArgs(CS, Formals);
if (!Callee || Callee->isInterposable()) {
- DEBUG(dbgs() << "Can not resolve function pointer.\n");
+ LLVM_DEBUG(dbgs() << "Can not resolve function pointer.\n");
return false; // Cannot resolve.
}
- SmallVector<Constant*, 8> Formals;
- for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i)
- Formals.push_back(getVal(*i));
-
if (Callee->isDeclaration()) {
// If this is a function we can constant fold, do it.
if (Constant *C = ConstantFoldCall(CS, Callee, Formals, TLI)) {
- InstResult = C;
- DEBUG(dbgs() << "Constant folded function call. Result: " <<
- *InstResult << "\n");
+ InstResult = castCallResultIfNeeded(CS.getCalledValue(), C);
+ if (!InstResult)
+ return false;
+ LLVM_DEBUG(dbgs() << "Constant folded function call. Result: "
+ << *InstResult << "\n");
} else {
- DEBUG(dbgs() << "Can not constant fold function call.\n");
+ LLVM_DEBUG(dbgs() << "Can not constant fold function call.\n");
return false;
}
} else {
if (Callee->getFunctionType()->isVarArg()) {
- DEBUG(dbgs() << "Can not constant fold vararg function call.\n");
+ LLVM_DEBUG(dbgs() << "Can not constant fold vararg function call.\n");
return false;
}
@@ -474,21 +562,24 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
// Execute the call, if successful, use the return value.
ValueStack.emplace_back();
if (!EvaluateFunction(Callee, RetVal, Formals)) {
- DEBUG(dbgs() << "Failed to evaluate function.\n");
+ LLVM_DEBUG(dbgs() << "Failed to evaluate function.\n");
return false;
}
ValueStack.pop_back();
- InstResult = RetVal;
+ InstResult = castCallResultIfNeeded(CS.getCalledValue(), RetVal);
+ if (RetVal && !InstResult)
+ return false;
if (InstResult) {
- DEBUG(dbgs() << "Successfully evaluated function. Result: "
- << *InstResult << "\n\n");
+ LLVM_DEBUG(dbgs() << "Successfully evaluated function. Result: "
+ << *InstResult << "\n\n");
} else {
- DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n");
+ LLVM_DEBUG(dbgs()
+ << "Successfully evaluated function. Result: 0\n\n");
}
}
} else if (isa<TerminatorInst>(CurInst)) {
- DEBUG(dbgs() << "Found a terminator instruction.\n");
+ LLVM_DEBUG(dbgs() << "Found a terminator instruction.\n");
if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
if (BI->isUnconditional()) {
@@ -515,17 +606,18 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
NextBB = nullptr;
} else {
// invoke, unwind, resume, unreachable.
- DEBUG(dbgs() << "Can not handle terminator.");
+ LLVM_DEBUG(dbgs() << "Can not handle terminator.");
return false; // Cannot handle this terminator.
}
// We succeeded at evaluating this block!
- DEBUG(dbgs() << "Successfully evaluated block.\n");
+ LLVM_DEBUG(dbgs() << "Successfully evaluated block.\n");
return true;
} else {
// Did not know how to evaluate this!
- DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction."
- "\n");
+ LLVM_DEBUG(
+ dbgs() << "Failed to evaluate block due to unhandled instruction."
+ "\n");
return false;
}
@@ -539,7 +631,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
// If we just processed an invoke, we finished evaluating the block.
if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
NextBB = II->getNormalDest();
- DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n");
+ LLVM_DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n");
return true;
}
@@ -578,7 +670,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
while (true) {
BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings.
- DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");
+ LLVM_DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");
if (!EvaluateBlock(CurInst, NextBB))
return false;
diff --git a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
index 5fdcc6d1d727..3c6c9c9a5df4 100644
--- a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
@@ -13,6 +13,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/IRBuilder.h"
@@ -24,7 +25,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
using namespace llvm;
@@ -36,16 +36,16 @@ namespace {
class FlattenCFGOpt {
AliasAnalysis *AA;
- /// \brief Use parallel-and or parallel-or to generate conditions for
+ /// Use parallel-and or parallel-or to generate conditions for
/// conditional branches.
bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder);
- /// \brief If \param BB is the merge block of an if-region, attempt to merge
+ /// If \param BB is the merge block of an if-region, attempt to merge
/// the if-region with an adjacent if-region upstream if two if-regions
/// contain identical instructions.
bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder);
- /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which
+ /// Compare a pair of blocks: \p Block1 and \p Block2, which
/// are from two if-regions whose entry blocks are \p Head1 and \p
/// Head2. \returns true if \p Block1 and \p Block2 contain identical
/// instructions, and have no memory reference alias with \p Head2.
@@ -312,7 +312,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
new UnreachableInst(CB->getContext(), CB);
} while (Iteration);
- DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock);
+ LLVM_DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock);
return true;
}
@@ -469,7 +469,7 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
// Remove \param SecondEntryBlock
SecondEntryBlock->dropAllReferences();
SecondEntryBlock->eraseFromParent();
- DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock);
+ LLVM_DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock);
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp
index 75539428b688..69203f9f2485 100644
--- a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp
@@ -18,7 +18,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -377,7 +376,7 @@ int FunctionComparator::cmpConstants(const Constant *L,
}
}
default: // Unknown constant, abort.
- DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n");
+ LLVM_DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n");
llvm_unreachable("Constant ValueID not recognized.");
return -1;
}
@@ -925,7 +924,7 @@ FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
H.add(F.arg_size());
SmallVector<const BasicBlock *, 8> BBs;
- SmallSet<const BasicBlock *, 16> VisitedBBs;
+ SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
// Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
// accumulating the hash of the function "structure." (BB and opcode sequence)
diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
index 6b5f593073b4..479816a339d0 100644
--- a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -206,15 +206,10 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
// definition.
if (GV.hasName()) {
ValueInfo VI = ImportIndex.getValueInfo(GV.getGUID());
- if (VI) {
- // Need to check all summaries are local in case of hash collisions.
- bool IsLocal = VI.getSummaryList().size() &&
- llvm::all_of(VI.getSummaryList(),
- [](const std::unique_ptr<GlobalValueSummary> &Summary) {
- return Summary->isDSOLocal();
- });
- if (IsLocal)
- GV.setDSOLocal(true);
+ if (VI && VI.isDSOLocal()) {
+ GV.setDSOLocal(true);
+ if (GV.hasDLLImportStorageClass())
+ GV.setDLLStorageClass(GlobalValue::DefaultStorageClass);
}
}
diff --git a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
index 245fefb38ee8..ff6970db47da 100644
--- a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
@@ -60,7 +60,7 @@ bool llvm::isSafeToDestroyConstant(const Constant *C) {
}
static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
- SmallPtrSetImpl<const PHINode *> &PhiUsers) {
+ SmallPtrSetImpl<const Value *> &VisitedUsers) {
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
if (GV->isExternallyInitialized())
GS.StoredType = GlobalStatus::StoredOnce;
@@ -75,7 +75,8 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
if (!isa<PointerType>(CE->getType()))
return true;
- if (analyzeGlobalAux(CE, GS, PhiUsers))
+ // FIXME: Do we need to add constexpr selects to VisitedUsers?
+ if (analyzeGlobalAux(CE, GS, VisitedUsers))
return true;
} else if (const Instruction *I = dyn_cast<Instruction>(UR)) {
if (!GS.HasMultipleAccessingFunctions) {
@@ -137,20 +138,18 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
GS.StoredType = GlobalStatus::Stored;
}
}
- } else if (isa<BitCastInst>(I)) {
- if (analyzeGlobalAux(I, GS, PhiUsers))
+ } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) {
+ // Skip over bitcasts and GEPs; we don't care about the type or offset
+ // of the pointer.
+ if (analyzeGlobalAux(I, GS, VisitedUsers))
return true;
- } else if (isa<GetElementPtrInst>(I)) {
- if (analyzeGlobalAux(I, GS, PhiUsers))
- return true;
- } else if (isa<SelectInst>(I)) {
- if (analyzeGlobalAux(I, GS, PhiUsers))
- return true;
- } else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
- // PHI nodes we can check just like select or GEP instructions, but we
- // have to be careful about infinite recursion.
- if (PhiUsers.insert(PN).second) // Not already visited.
- if (analyzeGlobalAux(I, GS, PhiUsers))
+ } else if (isa<SelectInst>(I) || isa<PHINode>(I)) {
+ // Look through selects and PHIs to find if the pointer is
+ // conditionally accessed. Make sure we only visit an instruction
+ // once; otherwise, we can get infinite recursion or exponential
+ // compile time.
+ if (VisitedUsers.insert(I).second)
+ if (analyzeGlobalAux(I, GS, VisitedUsers))
return true;
} else if (isa<CmpInst>(I)) {
GS.IsCompared = true;
@@ -191,6 +190,6 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
GlobalStatus::GlobalStatus() = default;
bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) {
- SmallPtrSet<const PHINode *, 16> PhiUsers;
- return analyzeGlobalAux(V, GS, PhiUsers);
+ SmallPtrSet<const Value *, 16> VisitedUsers;
+ return analyzeGlobalAux(V, GS, VisitedUsers);
}
diff --git a/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
index b8c12ad5ea84..8382220fc9e1 100644
--- a/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
@@ -161,7 +161,7 @@ void ImportedFunctionsInliningStatistics::dump(const bool Verbose) {
void ImportedFunctionsInliningStatistics::calculateRealInlines() {
// Removing duplicated Callers.
- std::sort(NonImportedCallers.begin(), NonImportedCallers.end());
+ llvm::sort(NonImportedCallers.begin(), NonImportedCallers.end());
NonImportedCallers.erase(
std::unique(NonImportedCallers.begin(), NonImportedCallers.end()),
NonImportedCallers.end());
@@ -190,13 +190,14 @@ ImportedFunctionsInliningStatistics::getSortedNodes() {
for (const NodesMapTy::value_type& Node : NodesMap)
SortedNodes.push_back(&Node);
- std::sort(
+ llvm::sort(
SortedNodes.begin(), SortedNodes.end(),
[&](const SortedNodesTy::value_type &Lhs,
const SortedNodesTy::value_type &Rhs) {
if (Lhs->second->NumberOfInlines != Rhs->second->NumberOfInlines)
return Lhs->second->NumberOfInlines > Rhs->second->NumberOfInlines;
- if (Lhs->second->NumberOfRealInlines != Rhs->second->NumberOfRealInlines)
+ if (Lhs->second->NumberOfRealInlines !=
+ Rhs->second->NumberOfRealInlines)
return Lhs->second->NumberOfRealInlines >
Rhs->second->NumberOfRealInlines;
return Lhs->first() < Rhs->first();
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
index fedf6e100d6c..0315aac1cf84 100644
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -29,6 +29,7 @@
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
@@ -60,7 +61,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
@@ -72,6 +72,7 @@
#include <vector>
using namespace llvm;
+using ProfileCount = Function::ProfileCount;
static cl::opt<bool>
EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true),
@@ -1247,7 +1248,7 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
// Always generate a memcpy of alignment 1 here because we don't know
// the alignment of the src pointer. Other optimizations can infer
// better alignment.
- Builder.CreateMemCpy(Dst, Src, Size, /*Align=*/1);
+ Builder.CreateMemCpy(Dst, /*DstAlign*/1, Src, /*SrcAlign*/1, Size);
}
/// When inlining a call site that has a byval argument,
@@ -1431,29 +1432,29 @@ static void updateCallerBFI(BasicBlock *CallSiteBlock,
/// Update the branch metadata for cloned call instructions.
static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
- const Optional<uint64_t> &CalleeEntryCount,
+ const ProfileCount &CalleeEntryCount,
const Instruction *TheCall,
ProfileSummaryInfo *PSI,
BlockFrequencyInfo *CallerBFI) {
- if (!CalleeEntryCount.hasValue() || CalleeEntryCount.getValue() < 1)
+ if (!CalleeEntryCount.hasValue() || CalleeEntryCount.isSynthetic() ||
+ CalleeEntryCount.getCount() < 1)
return;
- Optional<uint64_t> CallSiteCount =
- PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None;
+ auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None;
uint64_t CallCount =
std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0,
- CalleeEntryCount.getValue());
+ CalleeEntryCount.getCount());
for (auto const &Entry : VMap)
if (isa<CallInst>(Entry.first))
if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
- CI->updateProfWeight(CallCount, CalleeEntryCount.getValue());
+ CI->updateProfWeight(CallCount, CalleeEntryCount.getCount());
for (BasicBlock &BB : *Callee)
// No need to update the callsite if it is pruned during inlining.
if (VMap.count(&BB))
for (Instruction &I : BB)
if (CallInst *CI = dyn_cast<CallInst>(&I))
- CI->updateProfWeight(CalleeEntryCount.getValue() - CallCount,
- CalleeEntryCount.getValue());
+ CI->updateProfWeight(CalleeEntryCount.getCount() - CallCount,
+ CalleeEntryCount.getCount());
}
/// Update the entry count of callee after inlining.
@@ -1467,18 +1468,19 @@ static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB,
// callsite is M, the new callee count is set to N - M. M is estimated from
// the caller's entry count, its entry block frequency and the block frequency
// of the callsite.
- Optional<uint64_t> CalleeCount = Callee->getEntryCount();
+ auto CalleeCount = Callee->getEntryCount();
if (!CalleeCount.hasValue() || !PSI)
return;
- Optional<uint64_t> CallCount = PSI->getProfileCount(CallInst, CallerBFI);
+ auto CallCount = PSI->getProfileCount(CallInst, CallerBFI);
if (!CallCount.hasValue())
return;
// Since CallSiteCount is an estimate, it could exceed the original callee
// count and has to be set to 0.
- if (CallCount.getValue() > CalleeCount.getValue())
- Callee->setEntryCount(0);
+ if (CallCount.getValue() > CalleeCount.getCount())
+ CalleeCount.setCount(0);
else
- Callee->setEntryCount(CalleeCount.getValue() - CallCount.getValue());
+ CalleeCount.setCount(CalleeCount.getCount() - CallCount.getValue());
+ Callee->setEntryCount(CalleeCount);
}
/// This function inlines the called function into the basic block of the
@@ -1500,10 +1502,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
IFI.reset();
Function *CalledFunc = CS.getCalledFunction();
- if (!CalledFunc || // Can't inline external function or indirect
- CalledFunc->isDeclaration() ||
- (!ForwardVarArgsTo && CalledFunc->isVarArg())) // call, or call to a vararg function!
- return false;
+ if (!CalledFunc || // Can't inline external function or indirect
+ CalledFunc->isDeclaration()) // call!
+ return false;
// The inliner does not know how to inline through calls with operand bundles
// in general ...
@@ -1568,7 +1569,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
Instruction *CallSiteEHPad = nullptr;
if (CallerPersonality) {
EHPersonality Personality = classifyEHPersonality(CallerPersonality);
- if (isFuncletEHPersonality(Personality)) {
+ if (isScopedEHPersonality(Personality)) {
Optional<OperandBundleUse> ParentFunclet =
CS.getOperandBundle(LLVMContext::OB_funclet);
if (ParentFunclet)
@@ -1630,9 +1631,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
auto &DL = Caller->getParent()->getDataLayout();
- assert((CalledFunc->arg_size() == CS.arg_size() || ForwardVarArgsTo) &&
- "Varargs calls can only be inlined if the Varargs are forwarded!");
-
// Calculate the vector of arguments to pass into the function cloner, which
// matches up the formal to the actual argument values.
CallSite::arg_iterator AI = CS.arg_begin();
@@ -1815,9 +1813,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
}
SmallVector<Value*,4> VarArgsToForward;
+ SmallVector<AttributeSet, 4> VarArgsAttrs;
for (unsigned i = CalledFunc->getFunctionType()->getNumParams();
- i < CS.getNumArgOperands(); i++)
+ i < CS.getNumArgOperands(); i++) {
VarArgsToForward.push_back(CS.getArgOperand(i));
+ VarArgsAttrs.push_back(CS.getAttributes().getParamAttributes(i));
+ }
bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
if (InlinedFunctionInfo.ContainsCalls) {
@@ -1825,6 +1826,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
if (CallInst *CI = dyn_cast<CallInst>(TheCall))
CallSiteTailKind = CI->getTailCallKind();
+ // For inlining purposes, the "notail" marker is the same as no marker.
+ if (CallSiteTailKind == CallInst::TCK_NoTail)
+ CallSiteTailKind = CallInst::TCK_None;
+
for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
++BB) {
for (auto II = BB->begin(); II != BB->end();) {
@@ -1833,6 +1838,40 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
if (!CI)
continue;
+ // Forward varargs from inlined call site to calls to the
+ // ForwardVarArgsTo function, if requested, and to musttail calls.
+ if (!VarArgsToForward.empty() &&
+ ((ForwardVarArgsTo &&
+ CI->getCalledFunction() == ForwardVarArgsTo) ||
+ CI->isMustTailCall())) {
+ // Collect attributes for non-vararg parameters.
+ AttributeList Attrs = CI->getAttributes();
+ SmallVector<AttributeSet, 8> ArgAttrs;
+ if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) {
+ for (unsigned ArgNo = 0;
+ ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo)
+ ArgAttrs.push_back(Attrs.getParamAttributes(ArgNo));
+ }
+
+ // Add VarArg attributes.
+ ArgAttrs.append(VarArgsAttrs.begin(), VarArgsAttrs.end());
+ Attrs = AttributeList::get(CI->getContext(), Attrs.getFnAttributes(),
+ Attrs.getRetAttributes(), ArgAttrs);
+ // Add VarArgs to existing parameters.
+ SmallVector<Value *, 6> Params(CI->arg_operands());
+ Params.append(VarArgsToForward.begin(), VarArgsToForward.end());
+ CallInst *NewCI =
+ CallInst::Create(CI->getCalledFunction() ? CI->getCalledFunction()
+ : CI->getCalledValue(),
+ Params, "", CI);
+ NewCI->setDebugLoc(CI->getDebugLoc());
+ NewCI->setAttributes(Attrs);
+ NewCI->setCallingConv(CI->getCallingConv());
+ CI->replaceAllUsesWith(NewCI);
+ CI->eraseFromParent();
+ CI = NewCI;
+ }
+
if (Function *F = CI->getCalledFunction())
InlinedDeoptimizeCalls |=
F->getIntrinsicID() == Intrinsic::experimental_deoptimize;
@@ -1850,6 +1889,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// f -> musttail g -> tail f ==> f -> tail f
// f -> g -> musttail f ==> f -> f
// f -> g -> tail f ==> f -> f
+ //
+ // Inlined notail calls should remain notail calls.
CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
if (ChildTCK != CallInst::TCK_NoTail)
ChildTCK = std::min(CallSiteTailKind, ChildTCK);
@@ -1860,16 +1901,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// 'nounwind'.
if (MarkNoUnwind)
CI->setDoesNotThrow();
-
- if (ForwardVarArgsTo && !VarArgsToForward.empty() &&
- CI->getCalledFunction() == ForwardVarArgsTo) {
- SmallVector<Value*, 6> Params(CI->arg_operands());
- Params.append(VarArgsToForward.begin(), VarArgsToForward.end());
- CallInst *Call = CallInst::Create(CI->getCalledFunction(), Params, "", CI);
- Call->setDebugLoc(CI->getDebugLoc());
- CI->replaceAllUsesWith(Call);
- CI->eraseFromParent();
- }
}
}
}
diff --git a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
index 23ec45edb3ef..003721f2b939 100644
--- a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
@@ -17,7 +17,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
using namespace llvm;
namespace {
diff --git a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
index 5a90dcb033b2..3fbb3487884b 100644
--- a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
@@ -372,7 +372,7 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
/// information about the operands are known. Implements both 32bit and 64bit
/// scalar division.
///
-/// @brief Replace Rem with generated code.
+/// Replace Rem with generated code.
bool llvm::expandRemainder(BinaryOperator *Rem) {
assert((Rem->getOpcode() == Instruction::SRem ||
Rem->getOpcode() == Instruction::URem) &&
@@ -430,7 +430,7 @@ bool llvm::expandRemainder(BinaryOperator *Rem) {
/// when more information about the operands are known. Implements both
/// 32bit and 64bit scalar division.
///
-/// @brief Replace Div with generated code.
+/// Replace Div with generated code.
bool llvm::expandDivision(BinaryOperator *Div) {
assert((Div->getOpcode() == Instruction::SDiv ||
Div->getOpcode() == Instruction::UDiv) &&
@@ -482,7 +482,7 @@ bool llvm::expandDivision(BinaryOperator *Div) {
/// that have no or very little suppport for smaller than 32 bit integer
/// arithmetic.
///
-/// @brief Replace Rem with emulation code.
+/// Replace Rem with emulation code.
bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
assert((Rem->getOpcode() == Instruction::SRem ||
Rem->getOpcode() == Instruction::URem) &&
@@ -531,7 +531,7 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
/// 64 bits. Uses the above routines and extends the inputs/truncates the
/// outputs to operate in 64 bits.
///
-/// @brief Replace Rem with emulation code.
+/// Replace Rem with emulation code.
bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) {
assert((Rem->getOpcode() == Instruction::SRem ||
Rem->getOpcode() == Instruction::URem) &&
@@ -580,7 +580,7 @@ bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) {
/// in 32 bits; that is, these routines are good for targets that have no
/// or very little support for smaller than 32 bit integer arithmetic.
///
-/// @brief Replace Div with emulation code.
+/// Replace Div with emulation code.
bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
assert((Div->getOpcode() == Instruction::SDiv ||
Div->getOpcode() == Instruction::UDiv) &&
@@ -628,7 +628,7 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
/// above routines and extends the inputs/truncates the outputs to operate
/// in 64 bits.
///
-/// @brief Replace Div with emulation code.
+/// Replace Div with emulation code.
bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) {
assert((Div->getOpcode() == Instruction::SDiv ||
Div->getOpcode() == Instruction::UDiv) &&
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
index ae0e2bb6c280..956d0387c7a8 100644
--- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -36,13 +36,14 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PredIteratorCache.h"
#include "llvm/Pass.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
@@ -214,18 +215,27 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
Worklist.push_back(PostProcessPN);
// Keep track of PHI nodes that we want to remove because they did not have
- // any uses rewritten.
+ // any uses rewritten. If the new PHI is used, store it so that we can
+ // try to propagate dbg.value intrinsics to it.
+ SmallVector<PHINode *, 2> NeedDbgValues;
for (PHINode *PN : AddedPHIs)
if (PN->use_empty())
PHIsToRemove.insert(PN);
-
+ else
+ NeedDbgValues.push_back(PN);
+ insertDebugValuesForPHIs(InstBB, NeedDbgValues);
Changed = true;
}
- // Remove PHI nodes that did not have any uses rewritten.
- for (PHINode *PN : PHIsToRemove) {
- assert (PN->use_empty() && "Trying to remove a phi with uses.");
- PN->eraseFromParent();
- }
+ // Remove PHI nodes that did not have any uses rewritten. We need to redo the
+ // use_empty() check here, because even if the PHI node wasn't used when added
+ // to PHIsToRemove, later added PHI nodes can be using it. This cleanup is
+ // not guaranteed to handle trees/cycles of PHI nodes that only are used by
+ // each other. Such situations has only been noticed when the input IR
+ // contains unreachable code, and leaving some extra redundant PHI nodes in
+ // such situations is considered a minor problem.
+ for (PHINode *PN : PHIsToRemove)
+ if (PN->use_empty())
+ PN->eraseFromParent();
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index 42aca757c2af..9832a6f24e1f 100644
--- a/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -79,11 +79,11 @@ public:
bool perform() {
bool Changed = false;
for (auto &CI : WorkList) {
- DEBUG(dbgs() << "CDCE calls: " << CI->getCalledFunction()->getName()
- << "\n");
+ LLVM_DEBUG(dbgs() << "CDCE calls: " << CI->getCalledFunction()->getName()
+ << "\n");
if (perform(CI)) {
Changed = true;
- DEBUG(dbgs() << "Transformed\n");
+ LLVM_DEBUG(dbgs() << "Transformed\n");
}
}
return Changed;
@@ -421,7 +421,7 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI,
const LibFunc &Func) {
// FIXME: LibFunc_powf and powl TBD.
if (Func != LibFunc_pow) {
- DEBUG(dbgs() << "Not handled powf() and powl()\n");
+ LLVM_DEBUG(dbgs() << "Not handled powf() and powl()\n");
return nullptr;
}
@@ -433,7 +433,7 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI,
if (ConstantFP *CF = dyn_cast<ConstantFP>(Base)) {
double D = CF->getValueAPF().convertToDouble();
if (D < 1.0f || D > APInt::getMaxValue(8).getZExtValue()) {
- DEBUG(dbgs() << "Not handled pow(): constant base out of range\n");
+ LLVM_DEBUG(dbgs() << "Not handled pow(): constant base out of range\n");
return nullptr;
}
@@ -447,7 +447,7 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI,
// If the Base value coming from an integer type.
Instruction *I = dyn_cast<Instruction>(Base);
if (!I) {
- DEBUG(dbgs() << "Not handled pow(): FP type base\n");
+ LLVM_DEBUG(dbgs() << "Not handled pow(): FP type base\n");
return nullptr;
}
unsigned Opcode = I->getOpcode();
@@ -461,7 +461,7 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI,
else if (BW == 32)
UpperV = 32.0f;
else {
- DEBUG(dbgs() << "Not handled pow(): type too wide\n");
+ LLVM_DEBUG(dbgs() << "Not handled pow(): type too wide\n");
return nullptr;
}
@@ -477,7 +477,7 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI,
Value *Cond0 = BBBuilder.CreateFCmp(CmpInst::FCMP_OLE, Base, V0);
return BBBuilder.CreateOr(Cond0, Cond);
}
- DEBUG(dbgs() << "Not handled pow(): base not from integer convert\n");
+ LLVM_DEBUG(dbgs() << "Not handled pow(): base not from integer convert\n");
return nullptr;
}
@@ -496,9 +496,9 @@ void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) {
SuccBB->setName("cdce.end");
CI->removeFromParent();
CallBB->getInstList().insert(CallBB->getFirstInsertionPt(), CI);
- DEBUG(dbgs() << "== Basic Block After ==");
- DEBUG(dbgs() << *CallBB->getSinglePredecessor() << *CallBB
- << *CallBB->getSingleSuccessor() << "\n");
+ LLVM_DEBUG(dbgs() << "== Basic Block After ==");
+ LLVM_DEBUG(dbgs() << *CallBB->getSinglePredecessor() << *CallBB
+ << *CallBB->getSingleSuccessor() << "\n");
}
// Perform the transformation to a single candidate.
@@ -529,10 +529,7 @@ static bool runImpl(Function &F, const TargetLibraryInfo &TLI,
bool Changed = CCDCE.perform();
// Verify the dominator after we've updated it locally.
-#ifndef NDEBUG
- if (DT)
- DT->verifyDomTree();
-#endif
+ assert(!DT || DT->verify(DominatorTree::VerificationLevel::Fast));
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index acccf7abf808..ae3cb077a3af 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -73,6 +73,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
#include <climits>
@@ -100,7 +101,8 @@ STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
/// conditions and indirectbr addresses this might make dead if
/// DeleteDeadConditions is true.
bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
- const TargetLibraryInfo *TLI) {
+ const TargetLibraryInfo *TLI,
+ DeferredDominance *DDT) {
TerminatorInst *T = BB->getTerminator();
IRBuilder<> Builder(T);
@@ -123,6 +125,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// Replace the conditional branch with an unconditional one.
Builder.CreateBr(Destination);
BI->eraseFromParent();
+ if (DDT)
+ DDT->deleteEdge(BB, OldDest);
return true;
}
@@ -193,9 +197,12 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
createBranchWeights(Weights));
}
// Remove this entry.
- DefaultDest->removePredecessor(SI->getParent());
+ BasicBlock *ParentBB = SI->getParent();
+ DefaultDest->removePredecessor(ParentBB);
i = SI->removeCase(i);
e = SI->case_end();
+ if (DDT)
+ DDT->deleteEdge(ParentBB, DefaultDest);
continue;
}
@@ -221,14 +228,20 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// Insert the new branch.
Builder.CreateBr(TheOnlyDest);
BasicBlock *BB = SI->getParent();
+ std::vector <DominatorTree::UpdateType> Updates;
+ if (DDT)
+ Updates.reserve(SI->getNumSuccessors() - 1);
// Remove entries from PHI nodes which we no longer branch to...
for (BasicBlock *Succ : SI->successors()) {
// Found case matching a constant operand?
- if (Succ == TheOnlyDest)
+ if (Succ == TheOnlyDest) {
TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest
- else
+ } else {
Succ->removePredecessor(BB);
+ if (DDT)
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
+ }
}
// Delete the old switch.
@@ -236,6 +249,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
SI->eraseFromParent();
if (DeleteDeadConditions)
RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
+ if (DDT)
+ DDT->applyUpdates(Updates);
return true;
}
@@ -281,14 +296,23 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
if (auto *BA =
dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) {
BasicBlock *TheOnlyDest = BA->getBasicBlock();
+ std::vector <DominatorTree::UpdateType> Updates;
+ if (DDT)
+ Updates.reserve(IBI->getNumDestinations() - 1);
+
// Insert the new branch.
Builder.CreateBr(TheOnlyDest);
for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
- if (IBI->getDestination(i) == TheOnlyDest)
+ if (IBI->getDestination(i) == TheOnlyDest) {
TheOnlyDest = nullptr;
- else
- IBI->getDestination(i)->removePredecessor(IBI->getParent());
+ } else {
+ BasicBlock *ParentBB = IBI->getParent();
+ BasicBlock *DestBB = IBI->getDestination(i);
+ DestBB->removePredecessor(ParentBB);
+ if (DDT)
+ Updates.push_back({DominatorTree::Delete, ParentBB, DestBB});
+ }
}
Value *Address = IBI->getAddress();
IBI->eraseFromParent();
@@ -303,6 +327,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
new UnreachableInst(BB->getContext(), BB);
}
+ if (DDT)
+ DDT->applyUpdates(Updates);
return true;
}
}
@@ -346,6 +372,11 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
return false;
return true;
}
+ if (DbgLabelInst *DLI = dyn_cast<DbgLabelInst>(I)) {
+ if (DLI->getLabel())
+ return false;
+ return true;
+ }
if (!I->mayHaveSideEffects())
return true;
@@ -353,8 +384,9 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
// Special case intrinsics that "may have side effects" but can be deleted
// when dead.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- // Safe to delete llvm.stacksave if dead.
- if (II->getIntrinsicID() == Intrinsic::stacksave)
+ // Safe to delete llvm.stacksave and launder.invariant.group if dead.
+ if (II->getIntrinsicID() == Intrinsic::stacksave ||
+ II->getIntrinsicID() == Intrinsic::launder_invariant_group)
return true;
// Lifetime intrinsics are dead when their right-hand is undef.
@@ -402,17 +434,31 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
SmallVector<Instruction*, 16> DeadInsts;
DeadInsts.push_back(I);
+ RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI);
- do {
- I = DeadInsts.pop_back_val();
+ return true;
+}
+
+void llvm::RecursivelyDeleteTriviallyDeadInstructions(
+ SmallVectorImpl<Instruction *> &DeadInsts, const TargetLibraryInfo *TLI) {
+ // Process the dead instruction list until empty.
+ while (!DeadInsts.empty()) {
+ Instruction &I = *DeadInsts.pop_back_val();
+ assert(I.use_empty() && "Instructions with uses are not dead.");
+ assert(isInstructionTriviallyDead(&I, TLI) &&
+ "Live instruction found in dead worklist!");
+
+ // Don't lose the debug info while deleting the instructions.
+ salvageDebugInfo(I);
// Null out all of the instruction's operands to see if any operand becomes
// dead as we go.
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- Value *OpV = I->getOperand(i);
- I->setOperand(i, nullptr);
+ for (Use &OpU : I.operands()) {
+ Value *OpV = OpU.get();
+ OpU.set(nullptr);
- if (!OpV->use_empty()) continue;
+ if (!OpV->use_empty())
+ continue;
// If the operand is an instruction that became dead as we nulled out the
// operand, and if it is 'trivially' dead, delete it in a future loop
@@ -422,10 +468,8 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
DeadInsts.push_back(OpI);
}
- I->eraseFromParent();
- } while (!DeadInsts.empty());
-
- return true;
+ I.eraseFromParent();
+ }
}
/// areAllUsesEqual - Check whether the uses of a value are all the same.
@@ -477,6 +521,8 @@ simplifyAndDCEInstruction(Instruction *I,
const DataLayout &DL,
const TargetLibraryInfo *TLI) {
if (isInstructionTriviallyDead(I, TLI)) {
+ salvageDebugInfo(*I);
+
// Null out all of the instruction's operands to see if any operand becomes
// dead as we go.
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
@@ -579,7 +625,8 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB,
///
/// .. and delete the predecessor corresponding to the '1', this will attempt to
/// recursively fold the and to 0.
-void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) {
+void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
+ DeferredDominance *DDT) {
// This only adjusts blocks with PHI nodes.
if (!isa<PHINode>(BB->begin()))
return;
@@ -602,13 +649,18 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) {
// of the block.
if (PhiIt != OldPhiIt) PhiIt = &BB->front();
}
+ if (DDT)
+ DDT->deleteEdge(Pred, BB);
}
/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
/// predecessor is known to have one successor (DestBB!). Eliminate the edge
/// between them, moving the instructions in the predecessor into DestBB and
/// deleting the predecessor block.
-void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) {
+void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT,
+ DeferredDominance *DDT) {
+ assert(!(DT && DDT) && "Cannot call with both DT and DDT.");
+
// If BB has single-entry PHI nodes, fold them.
while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
Value *NewVal = PN->getIncomingValue(0);
@@ -621,6 +673,24 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) {
BasicBlock *PredBB = DestBB->getSinglePredecessor();
assert(PredBB && "Block doesn't have a single predecessor!");
+ bool ReplaceEntryBB = false;
+ if (PredBB == &DestBB->getParent()->getEntryBlock())
+ ReplaceEntryBB = true;
+
+ // Deferred DT update: Collect all the edges that enter PredBB. These
+ // dominator edges will be redirected to DestBB.
+ std::vector <DominatorTree::UpdateType> Updates;
+ if (DDT && !ReplaceEntryBB) {
+ Updates.reserve(1 + (2 * pred_size(PredBB)));
+ Updates.push_back({DominatorTree::Delete, PredBB, DestBB});
+ for (auto I = pred_begin(PredBB), E = pred_end(PredBB); I != E; ++I) {
+ Updates.push_back({DominatorTree::Delete, *I, PredBB});
+ // This predecessor of PredBB may already have DestBB as a successor.
+ if (llvm::find(successors(*I), DestBB) == succ_end(*I))
+ Updates.push_back({DominatorTree::Insert, *I, DestBB});
+ }
+ }
+
// Zap anything that took the address of DestBB. Not doing this will give the
// address an invalid value.
if (DestBB->hasAddressTaken()) {
@@ -641,7 +711,7 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) {
// If the PredBB is the entry block of the function, move DestBB up to
// become the entry block after we erase PredBB.
- if (PredBB == &DestBB->getParent()->getEntryBlock())
+ if (ReplaceEntryBB)
DestBB->moveAfter(PredBB);
if (DT) {
@@ -653,8 +723,19 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) {
DT->eraseNode(PredBB);
}
}
- // Nuke BB.
- PredBB->eraseFromParent();
+
+ if (DDT) {
+ DDT->deleteBB(PredBB); // Deferred deletion of BB.
+ if (ReplaceEntryBB)
+ // The entry block was removed and there is no external interface for the
+ // dominator tree to be notified of this change. In this corner-case we
+ // recalculate the entire tree.
+ DDT->recalculate(*(DestBB->getParent()));
+ else
+ DDT->applyUpdates(Updates);
+ } else {
+ PredBB->eraseFromParent(); // Nuke BB.
+ }
}
/// CanMergeValues - Return true if we can choose one of these values to use
@@ -671,8 +752,8 @@ static bool CanMergeValues(Value *First, Value *Second) {
static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
- DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into "
- << Succ->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into "
+ << Succ->getName() << "\n");
// Shortcut, if there is only a single predecessor it must be BB and merging
// is always safe
if (Succ->getSinglePredecessor()) return true;
@@ -695,10 +776,11 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
if (BBPreds.count(IBB) &&
!CanMergeValues(BBPN->getIncomingValueForBlock(IBB),
PN->getIncomingValue(PI))) {
- DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
- << Succ->getName() << " is conflicting with "
- << BBPN->getName() << " with regard to common predecessor "
- << IBB->getName() << "\n");
+ LLVM_DEBUG(dbgs()
+ << "Can't fold, phi node " << PN->getName() << " in "
+ << Succ->getName() << " is conflicting with "
+ << BBPN->getName() << " with regard to common predecessor "
+ << IBB->getName() << "\n");
return false;
}
}
@@ -711,9 +793,10 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
BasicBlock *IBB = PN->getIncomingBlock(PI);
if (BBPreds.count(IBB) &&
!CanMergeValues(Val, PN->getIncomingValue(PI))) {
- DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
- << Succ->getName() << " is conflicting with regard to common "
- << "predecessor " << IBB->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "Can't fold, phi node " << PN->getName()
+ << " in " << Succ->getName()
+ << " is conflicting with regard to common "
+ << "predecessor " << IBB->getName() << "\n");
return false;
}
}
@@ -726,7 +809,7 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
using PredBlockVector = SmallVector<BasicBlock *, 16>;
using IncomingValueMap = DenseMap<BasicBlock *, Value *>;
-/// \brief Determines the value to use as the phi node input for a block.
+/// Determines the value to use as the phi node input for a block.
///
/// Select between \p OldVal any value that we know flows from \p BB
/// to a particular phi on the basis of which one (if either) is not
@@ -755,7 +838,7 @@ static Value *selectIncomingValueForBlock(Value *OldVal, BasicBlock *BB,
return OldVal;
}
-/// \brief Create a map from block to value for the operands of a
+/// Create a map from block to value for the operands of a
/// given phi.
///
/// Create a map from block to value for each non-undef value flowing
@@ -774,7 +857,7 @@ static void gatherIncomingValuesToPhi(PHINode *PN,
}
}
-/// \brief Replace the incoming undef values to a phi with the values
+/// Replace the incoming undef values to a phi with the values
/// from a block-to-value map.
///
/// \param PN The phi we are replacing the undefs in.
@@ -794,7 +877,7 @@ static void replaceUndefValuesInPhi(PHINode *PN,
}
}
-/// \brief Replace a value flowing from a block to a phi with
+/// Replace a value flowing from a block to a phi with
/// potentially multiple instances of that value flowing from the
/// block's predecessors to the phi.
///
@@ -861,7 +944,8 @@ static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB,
/// potential side-effect free intrinsics and the branch. If possible,
/// eliminate BB by rewriting all the predecessors to branch to the successor
/// block and return true. If we can't transform, return false.
-bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
+bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
+ DeferredDominance *DDT) {
assert(BB != &BB->getParent()->getEntryBlock() &&
"TryToSimplifyUncondBranchFromEmptyBlock called on entry block!");
@@ -900,7 +984,20 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
}
}
- DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
+ LLVM_DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
+
+ std::vector<DominatorTree::UpdateType> Updates;
+ if (DDT) {
+ Updates.reserve(1 + (2 * pred_size(BB)));
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
+ // All predecessors of BB will be moved to Succ.
+ for (auto I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
+ Updates.push_back({DominatorTree::Delete, *I, BB});
+ // This predecessor of BB may already have Succ as a successor.
+ if (llvm::find(successors(*I), Succ) == succ_end(*I))
+ Updates.push_back({DominatorTree::Insert, *I, Succ});
+ }
+ }
if (isa<PHINode>(Succ->begin())) {
// If there is more than one pred of succ, and there are PHI nodes in
@@ -946,7 +1043,13 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
// Everything that jumped to BB now goes to Succ.
BB->replaceAllUsesWith(Succ);
if (!Succ->hasName()) Succ->takeName(BB);
- BB->eraseFromParent(); // Delete the old basic block.
+
+ if (DDT) {
+ DDT->deleteBB(BB); // Deferred deletion of the old basic block.
+ DDT->applyUpdates(Updates);
+ } else {
+ BB->eraseFromParent(); // Delete the old basic block.
+ }
return true;
}
@@ -1125,6 +1228,31 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar,
return false;
}
+/// Check if the alloc size of \p ValTy is large enough to cover the variable
+/// (or fragment of the variable) described by \p DII.
+///
+/// This is primarily intended as a helper for the different
+/// ConvertDebugDeclareToDebugValue functions. The dbg.declare/dbg.addr that is
+/// converted describes an alloca'd variable, so we need to use the
+/// alloc size of the value when doing the comparison. E.g. an i1 value will be
+/// identified as covering an n-bit fragment, if the store size of i1 is at
+/// least n bits.
+static bool valueCoversEntireFragment(Type *ValTy, DbgInfoIntrinsic *DII) {
+ const DataLayout &DL = DII->getModule()->getDataLayout();
+ uint64_t ValueSize = DL.getTypeAllocSizeInBits(ValTy);
+ if (auto FragmentSize = DII->getFragmentSizeInBits())
+ return ValueSize >= *FragmentSize;
+ // We can't always calculate the size of the DI variable (e.g. if it is a
+ // VLA). Try to use the size of the alloca that the dbg intrinsic describes
+ // intead.
+ if (DII->isAddressOfVariable())
+ if (auto *AI = dyn_cast_or_null<AllocaInst>(DII->getVariableLocation()))
+ if (auto FragmentSize = AI->getAllocationSizeInBits(DL))
+ return ValueSize >= *FragmentSize;
+ // Could not determine size of variable. Conservatively return false.
+ return false;
+}
+
/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
@@ -1135,6 +1263,21 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
auto *DIExpr = DII->getExpression();
Value *DV = SI->getOperand(0);
+ if (!valueCoversEntireFragment(SI->getValueOperand()->getType(), DII)) {
+ // FIXME: If storing to a part of the variable described by the dbg.declare,
+ // then we want to insert a dbg.value for the corresponding fragment.
+ LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: "
+ << *DII << '\n');
+ // For now, when there is a store to parts of the variable (but we do not
+ // know which part) we insert an dbg.value instrinsic to indicate that we
+ // know nothing about the variable's content.
+ DV = UndefValue::get(DV->getType());
+ if (!LdStHasDebugValue(DIVar, DIExpr, SI))
+ Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, DII->getDebugLoc(),
+ SI);
+ return;
+ }
+
// If an argument is zero extended then use argument directly. The ZExt
// may be zapped by an optimization pass in future.
Argument *ExtendedArg = nullptr;
@@ -1178,6 +1321,15 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
if (LdStHasDebugValue(DIVar, DIExpr, LI))
return;
+ if (!valueCoversEntireFragment(LI->getType(), DII)) {
+ // FIXME: If only referring to a part of the variable described by the
+ // dbg.declare, then we want to insert a dbg.value for the corresponding
+ // fragment.
+ LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: "
+ << *DII << '\n');
+ return;
+ }
+
// We are now tracking the loaded value instead of the address. In the
// future if multi-location support is added to the IR, it might be
// preferable to keep tracking both the loaded value and the original
@@ -1198,6 +1350,15 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
if (PhiHasDebugValue(DIVar, DIExpr, APN))
return;
+ if (!valueCoversEntireFragment(APN->getType(), DII)) {
+ // FIXME: If only referring to a part of the variable described by the
+ // dbg.declare, then we want to insert a dbg.value for the corresponding
+ // fragment.
+ LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: "
+ << *DII << '\n');
+ return;
+ }
+
BasicBlock *BB = APN->getParent();
auto InsertionPt = BB->getFirstInsertionPt();
@@ -1237,33 +1398,91 @@ bool llvm::LowerDbgDeclare(Function &F) {
// stored on the stack, while the dbg.declare can only describe
// the stack slot (and at a lexical-scope granularity). Later
// passes will attempt to elide the stack slot.
- if (AI && !isArray(AI)) {
- for (auto &AIUse : AI->uses()) {
- User *U = AIUse.getUser();
- if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
- if (AIUse.getOperandNo() == 1)
- ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
- } else if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
- } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
- // This is a call by-value or some other instruction that
- // takes a pointer to the variable. Insert a *value*
- // intrinsic that describes the alloca.
- DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(),
- DDI->getExpression(), DDI->getDebugLoc(),
- CI);
- }
+ if (!AI || isArray(AI))
+ continue;
+
+ // A volatile load/store means that the alloca can't be elided anyway.
+ if (llvm::any_of(AI->users(), [](User *U) -> bool {
+ if (LoadInst *LI = dyn_cast<LoadInst>(U))
+ return LI->isVolatile();
+ if (StoreInst *SI = dyn_cast<StoreInst>(U))
+ return SI->isVolatile();
+ return false;
+ }))
+ continue;
+
+ for (auto &AIUse : AI->uses()) {
+ User *U = AIUse.getUser();
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (AIUse.getOperandNo() == 1)
+ ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
+ } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
+ // This is a call by-value or some other instruction that takes a
+ // pointer to the variable. Insert a *value* intrinsic that describes
+ // the variable by dereferencing the alloca.
+ auto *DerefExpr =
+ DIExpression::append(DDI->getExpression(), dwarf::DW_OP_deref);
+ DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr,
+ DDI->getDebugLoc(), CI);
}
- DDI->eraseFromParent();
}
+ DDI->eraseFromParent();
}
return true;
}
+/// Propagate dbg.value intrinsics through the newly inserted PHIs.
+void llvm::insertDebugValuesForPHIs(BasicBlock *BB,
+ SmallVectorImpl<PHINode *> &InsertedPHIs) {
+ assert(BB && "No BasicBlock to clone dbg.value(s) from.");
+ if (InsertedPHIs.size() == 0)
+ return;
+
+ // Map existing PHI nodes to their dbg.values.
+ ValueToValueMapTy DbgValueMap;
+ for (auto &I : *BB) {
+ if (auto DbgII = dyn_cast<DbgInfoIntrinsic>(&I)) {
+ if (auto *Loc = dyn_cast_or_null<PHINode>(DbgII->getVariableLocation()))
+ DbgValueMap.insert({Loc, DbgII});
+ }
+ }
+ if (DbgValueMap.size() == 0)
+ return;
+
+ // Then iterate through the new PHIs and look to see if they use one of the
+ // previously mapped PHIs. If so, insert a new dbg.value intrinsic that will
+ // propagate the info through the new PHI.
+ LLVMContext &C = BB->getContext();
+ for (auto PHI : InsertedPHIs) {
+ BasicBlock *Parent = PHI->getParent();
+ // Avoid inserting an intrinsic into an EH block.
+ if (Parent->getFirstNonPHI()->isEHPad())
+ continue;
+ auto PhiMAV = MetadataAsValue::get(C, ValueAsMetadata::get(PHI));
+ for (auto VI : PHI->operand_values()) {
+ auto V = DbgValueMap.find(VI);
+ if (V != DbgValueMap.end()) {
+ auto *DbgII = cast<DbgInfoIntrinsic>(V->second);
+ Instruction *NewDbgII = DbgII->clone();
+ NewDbgII->setOperand(0, PhiMAV);
+ auto InsertionPt = Parent->getFirstInsertionPt();
+ assert(InsertionPt != Parent->end() && "Ill-formed basic block");
+ NewDbgII->insertBefore(&*InsertionPt);
+ }
+ }
+ }
+}
+
/// Finds all intrinsics declaring local variables as living in the memory that
/// 'V' points to. This may include a mix of dbg.declare and
/// dbg.addr intrinsics.
TinyPtrVector<DbgInfoIntrinsic *> llvm::FindDbgAddrUses(Value *V) {
+ // This function is hot. Check whether the value has any metadata to avoid a
+ // DenseMap lookup.
+ if (!V->isUsedByMetadata())
+ return {};
auto *L = LocalAsMetadata::getIfExists(V);
if (!L)
return {};
@@ -1282,6 +1501,10 @@ TinyPtrVector<DbgInfoIntrinsic *> llvm::FindDbgAddrUses(Value *V) {
}
void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {
+ // This function is hot. Check whether the value has any metadata to avoid a
+ // DenseMap lookup.
+ if (!V->isUsedByMetadata())
+ return;
if (auto *L = LocalAsMetadata::getIfExists(V))
if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L))
for (User *U : MDV->users())
@@ -1289,8 +1512,12 @@ void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {
DbgValues.push_back(DVI);
}
-static void findDbgUsers(SmallVectorImpl<DbgInfoIntrinsic *> &DbgUsers,
- Value *V) {
+void llvm::findDbgUsers(SmallVectorImpl<DbgInfoIntrinsic *> &DbgUsers,
+ Value *V) {
+ // This function is hot. Check whether the value has any metadata to avoid a
+ // DenseMap lookup.
+ if (!V->isUsedByMetadata())
+ return;
if (auto *L = LocalAsMetadata::getIfExists(V))
if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L))
for (User *U : MDV->users())
@@ -1308,11 +1535,11 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
auto *DIExpr = DII->getExpression();
assert(DIVar && "Missing variable");
DIExpr = DIExpression::prepend(DIExpr, DerefBefore, Offset, DerefAfter);
- // Insert llvm.dbg.declare immediately after InsertBefore, and remove old
+ // Insert llvm.dbg.declare immediately before InsertBefore, and remove old
// llvm.dbg.declare.
Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore);
if (DII == InsertBefore)
- InsertBefore = &*std::next(InsertBefore->getIterator());
+ InsertBefore = InsertBefore->getNextNode();
DII->eraseFromParent();
}
return !DbgAddrs.empty();
@@ -1364,66 +1591,293 @@ void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
}
}
-void llvm::salvageDebugInfo(Instruction &I) {
- SmallVector<DbgValueInst *, 1> DbgValues;
+/// Wrap \p V in a ValueAsMetadata instance.
+static MetadataAsValue *wrapValueInMetadata(LLVMContext &C, Value *V) {
+ return MetadataAsValue::get(C, ValueAsMetadata::get(V));
+}
+
+bool llvm::salvageDebugInfo(Instruction &I) {
+ SmallVector<DbgInfoIntrinsic *, 1> DbgUsers;
+ findDbgUsers(DbgUsers, &I);
+ if (DbgUsers.empty())
+ return false;
+
auto &M = *I.getModule();
+ auto &DL = M.getDataLayout();
+ auto &Ctx = I.getContext();
+ auto wrapMD = [&](Value *V) { return wrapValueInMetadata(Ctx, V); };
- auto wrapMD = [&](Value *V) {
- return MetadataAsValue::get(I.getContext(), ValueAsMetadata::get(V));
+ auto doSalvage = [&](DbgInfoIntrinsic *DII, SmallVectorImpl<uint64_t> &Ops) {
+ auto *DIExpr = DII->getExpression();
+ if (!Ops.empty()) {
+ // Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they
+ // are implicitly pointing out the value as a DWARF memory location
+ // description.
+ bool WithStackValue = isa<DbgValueInst>(DII);
+ DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue);
+ }
+ DII->setOperand(0, wrapMD(I.getOperand(0)));
+ DII->setOperand(2, MetadataAsValue::get(Ctx, DIExpr));
+ LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
};
- auto applyOffset = [&](DbgValueInst *DVI, uint64_t Offset) {
- auto *DIExpr = DVI->getExpression();
- DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, Offset,
- DIExpression::NoDeref,
- DIExpression::WithStackValue);
- DVI->setOperand(0, wrapMD(I.getOperand(0)));
- DVI->setOperand(2, MetadataAsValue::get(I.getContext(), DIExpr));
- DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
+ auto applyOffset = [&](DbgInfoIntrinsic *DII, uint64_t Offset) {
+ SmallVector<uint64_t, 8> Ops;
+ DIExpression::appendOffset(Ops, Offset);
+ doSalvage(DII, Ops);
};
- if (isa<BitCastInst>(&I) || isa<IntToPtrInst>(&I)) {
- // Bitcasts are entirely irrelevant for debug info. Rewrite dbg.value,
- // dbg.addr, and dbg.declare to use the cast's source.
- SmallVector<DbgInfoIntrinsic *, 1> DbgUsers;
- findDbgUsers(DbgUsers, &I);
+ auto applyOps = [&](DbgInfoIntrinsic *DII,
+ std::initializer_list<uint64_t> Opcodes) {
+ SmallVector<uint64_t, 8> Ops(Opcodes);
+ doSalvage(DII, Ops);
+ };
+
+ if (auto *CI = dyn_cast<CastInst>(&I)) {
+ if (!CI->isNoopCast(DL))
+ return false;
+
+ // No-op casts are irrelevant for debug info.
+ MetadataAsValue *CastSrc = wrapMD(I.getOperand(0));
for (auto *DII : DbgUsers) {
- DII->setOperand(0, wrapMD(I.getOperand(0)));
- DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
+ DII->setOperand(0, CastSrc);
+ LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
}
+ return true;
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
- findDbgValues(DbgValues, &I);
- for (auto *DVI : DbgValues) {
- unsigned BitWidth =
- M.getDataLayout().getPointerSizeInBits(GEP->getPointerAddressSpace());
- APInt Offset(BitWidth, 0);
- // Rewrite a constant GEP into a DIExpression. Since we are performing
- // arithmetic to compute the variable's *value* in the DIExpression, we
- // need to mark the expression with a DW_OP_stack_value.
- if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset))
- // GEP offsets are i32 and thus always fit into an int64_t.
- applyOffset(DVI, Offset.getSExtValue());
- }
+ unsigned BitWidth =
+ M.getDataLayout().getIndexSizeInBits(GEP->getPointerAddressSpace());
+ // Rewrite a constant GEP into a DIExpression. Since we are performing
+ // arithmetic to compute the variable's *value* in the DIExpression, we
+ // need to mark the expression with a DW_OP_stack_value.
+ APInt Offset(BitWidth, 0);
+ if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset))
+ for (auto *DII : DbgUsers)
+ applyOffset(DII, Offset.getSExtValue());
+ return true;
} else if (auto *BI = dyn_cast<BinaryOperator>(&I)) {
- if (BI->getOpcode() == Instruction::Add)
- if (auto *ConstInt = dyn_cast<ConstantInt>(I.getOperand(1)))
- if (ConstInt->getBitWidth() <= 64) {
- APInt Offset = ConstInt->getValue();
- findDbgValues(DbgValues, &I);
- for (auto *DVI : DbgValues)
- applyOffset(DVI, Offset.getSExtValue());
- }
+ // Rewrite binary operations with constant integer operands.
+ auto *ConstInt = dyn_cast<ConstantInt>(I.getOperand(1));
+ if (!ConstInt || ConstInt->getBitWidth() > 64)
+ return false;
+
+ uint64_t Val = ConstInt->getSExtValue();
+ for (auto *DII : DbgUsers) {
+ switch (BI->getOpcode()) {
+ case Instruction::Add:
+ applyOffset(DII, Val);
+ break;
+ case Instruction::Sub:
+ applyOffset(DII, -int64_t(Val));
+ break;
+ case Instruction::Mul:
+ applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_mul});
+ break;
+ case Instruction::SDiv:
+ applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_div});
+ break;
+ case Instruction::SRem:
+ applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_mod});
+ break;
+ case Instruction::Or:
+ applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_or});
+ break;
+ case Instruction::And:
+ applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_and});
+ break;
+ case Instruction::Xor:
+ applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_xor});
+ break;
+ case Instruction::Shl:
+ applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_shl});
+ break;
+ case Instruction::LShr:
+ applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_shr});
+ break;
+ case Instruction::AShr:
+ applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_shra});
+ break;
+ default:
+ // TODO: Salvage constants from each kind of binop we know about.
+ return false;
+ }
+ }
+ return true;
} else if (isa<LoadInst>(&I)) {
- findDbgValues(DbgValues, &I);
- for (auto *DVI : DbgValues) {
+ MetadataAsValue *AddrMD = wrapMD(I.getOperand(0));
+ for (auto *DII : DbgUsers) {
// Rewrite the load into DW_OP_deref.
- auto *DIExpr = DVI->getExpression();
+ auto *DIExpr = DII->getExpression();
DIExpr = DIExpression::prepend(DIExpr, DIExpression::WithDeref);
- DVI->setOperand(0, wrapMD(I.getOperand(0)));
- DVI->setOperand(2, MetadataAsValue::get(I.getContext(), DIExpr));
- DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
+ DII->setOperand(0, AddrMD);
+ DII->setOperand(2, MetadataAsValue::get(Ctx, DIExpr));
+ LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
+ }
+ return true;
+ }
+ return false;
+}
+
+/// A replacement for a dbg.value expression.
+using DbgValReplacement = Optional<DIExpression *>;
+
+/// Point debug users of \p From to \p To using exprs given by \p RewriteExpr,
+/// possibly moving/deleting users to prevent use-before-def. Returns true if
+/// changes are made.
+static bool rewriteDebugUsers(
+ Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT,
+ function_ref<DbgValReplacement(DbgInfoIntrinsic &DII)> RewriteExpr) {
+ // Find debug users of From.
+ SmallVector<DbgInfoIntrinsic *, 1> Users;
+ findDbgUsers(Users, &From);
+ if (Users.empty())
+ return false;
+
+ // Prevent use-before-def of To.
+ bool Changed = false;
+ SmallPtrSet<DbgInfoIntrinsic *, 1> DeleteOrSalvage;
+ if (isa<Instruction>(&To)) {
+ bool DomPointAfterFrom = From.getNextNonDebugInstruction() == &DomPoint;
+
+ for (auto *DII : Users) {
+ // It's common to see a debug user between From and DomPoint. Move it
+ // after DomPoint to preserve the variable update without any reordering.
+ if (DomPointAfterFrom && DII->getNextNonDebugInstruction() == &DomPoint) {
+ LLVM_DEBUG(dbgs() << "MOVE: " << *DII << '\n');
+ DII->moveAfter(&DomPoint);
+ Changed = true;
+
+ // Users which otherwise aren't dominated by the replacement value must
+ // be salvaged or deleted.
+ } else if (!DT.dominates(&DomPoint, DII)) {
+ DeleteOrSalvage.insert(DII);
+ }
}
}
+
+ // Update debug users without use-before-def risk.
+ for (auto *DII : Users) {
+ if (DeleteOrSalvage.count(DII))
+ continue;
+
+ LLVMContext &Ctx = DII->getContext();
+ DbgValReplacement DVR = RewriteExpr(*DII);
+ if (!DVR)
+ continue;
+
+ DII->setOperand(0, wrapValueInMetadata(Ctx, &To));
+ DII->setOperand(2, MetadataAsValue::get(Ctx, *DVR));
+ LLVM_DEBUG(dbgs() << "REWRITE: " << *DII << '\n');
+ Changed = true;
+ }
+
+ if (!DeleteOrSalvage.empty()) {
+ // Try to salvage the remaining debug users.
+ Changed |= salvageDebugInfo(From);
+
+ // Delete the debug users which weren't salvaged.
+ for (auto *DII : DeleteOrSalvage) {
+ if (DII->getVariableLocation() == &From) {
+ LLVM_DEBUG(dbgs() << "Erased UseBeforeDef: " << *DII << '\n');
+ DII->eraseFromParent();
+ Changed = true;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+/// Check if a bitcast between a value of type \p FromTy to type \p ToTy would
+/// losslessly preserve the bits and semantics of the value. This predicate is
+/// symmetric, i.e swapping \p FromTy and \p ToTy should give the same result.
+///
+/// Note that Type::canLosslesslyBitCastTo is not suitable here because it
+/// allows semantically unequivalent bitcasts, such as <2 x i64> -> <4 x i32>,
+/// and also does not allow lossless pointer <-> integer conversions.
+static bool isBitCastSemanticsPreserving(const DataLayout &DL, Type *FromTy,
+ Type *ToTy) {
+ // Trivially compatible types.
+ if (FromTy == ToTy)
+ return true;
+
+ // Handle compatible pointer <-> integer conversions.
+ if (FromTy->isIntOrPtrTy() && ToTy->isIntOrPtrTy()) {
+ bool SameSize = DL.getTypeSizeInBits(FromTy) == DL.getTypeSizeInBits(ToTy);
+ bool LosslessConversion = !DL.isNonIntegralPointerType(FromTy) &&
+ !DL.isNonIntegralPointerType(ToTy);
+ return SameSize && LosslessConversion;
+ }
+
+ // TODO: This is not exhaustive.
+ return false;
+}
+
+bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To,
+ Instruction &DomPoint, DominatorTree &DT) {
+ // Exit early if From has no debug users.
+ if (!From.isUsedByMetadata())
+ return false;
+
+ assert(&From != &To && "Can't replace something with itself");
+
+ Type *FromTy = From.getType();
+ Type *ToTy = To.getType();
+
+ auto Identity = [&](DbgInfoIntrinsic &DII) -> DbgValReplacement {
+ return DII.getExpression();
+ };
+
+ // Handle no-op conversions.
+ Module &M = *From.getModule();
+ const DataLayout &DL = M.getDataLayout();
+ if (isBitCastSemanticsPreserving(DL, FromTy, ToTy))
+ return rewriteDebugUsers(From, To, DomPoint, DT, Identity);
+
+ // Handle integer-to-integer widening and narrowing.
+ // FIXME: Use DW_OP_convert when it's available everywhere.
+ if (FromTy->isIntegerTy() && ToTy->isIntegerTy()) {
+ uint64_t FromBits = FromTy->getPrimitiveSizeInBits();
+ uint64_t ToBits = ToTy->getPrimitiveSizeInBits();
+ assert(FromBits != ToBits && "Unexpected no-op conversion");
+
+ // When the width of the result grows, assume that a debugger will only
+ // access the low `FromBits` bits when inspecting the source variable.
+ if (FromBits < ToBits)
+ return rewriteDebugUsers(From, To, DomPoint, DT, Identity);
+
+ // The width of the result has shrunk. Use sign/zero extension to describe
+ // the source variable's high bits.
+ auto SignOrZeroExt = [&](DbgInfoIntrinsic &DII) -> DbgValReplacement {
+ DILocalVariable *Var = DII.getVariable();
+
+ // Without knowing signedness, sign/zero extension isn't possible.
+ auto Signedness = Var->getSignedness();
+ if (!Signedness)
+ return None;
+
+ bool Signed = *Signedness == DIBasicType::Signedness::Signed;
+
+ if (!Signed) {
+ // In the unsigned case, assume that a debugger will initialize the
+ // high bits to 0 and do a no-op conversion.
+ return Identity(DII);
+ } else {
+ // In the signed case, the high bits are given by sign extension, i.e:
+ // (To >> (ToBits - 1)) * ((2 ^ FromBits) - 1)
+ // Calculate the high bits and OR them together with the low bits.
+ SmallVector<uint64_t, 8> Ops({dwarf::DW_OP_dup, dwarf::DW_OP_constu,
+ (ToBits - 1), dwarf::DW_OP_shr,
+ dwarf::DW_OP_lit0, dwarf::DW_OP_not,
+ dwarf::DW_OP_mul, dwarf::DW_OP_or});
+ return DIExpression::appendToStack(DII.getExpression(), Ops);
+ }
+ };
+ return rewriteDebugUsers(From, To, DomPoint, DT, SignOrZeroExt);
+ }
+
+ // TODO: Floating-point conversions, vectors.
+ return false;
}
unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
@@ -1448,13 +1902,19 @@ unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
}
unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
- bool PreserveLCSSA) {
+ bool PreserveLCSSA, DeferredDominance *DDT) {
BasicBlock *BB = I->getParent();
+ std::vector <DominatorTree::UpdateType> Updates;
+
// Loop over all of the successors, removing BB's entry from any PHI
// nodes.
- for (BasicBlock *Successor : successors(BB))
+ if (DDT)
+ Updates.reserve(BB->getTerminator()->getNumSuccessors());
+ for (BasicBlock *Successor : successors(BB)) {
Successor->removePredecessor(BB, PreserveLCSSA);
-
+ if (DDT)
+ Updates.push_back({DominatorTree::Delete, BB, Successor});
+ }
// Insert a call to llvm.trap right before this. This turns the undefined
// behavior into a hard fail instead of falling through into random code.
if (UseLLVMTrap) {
@@ -1474,11 +1934,13 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
BB->getInstList().erase(BBI++);
++NumInstrsRemoved;
}
+ if (DDT)
+ DDT->applyUpdates(Updates);
return NumInstrsRemoved;
}
/// changeToCall - Convert the specified invoke into a normal call.
-static void changeToCall(InvokeInst *II) {
+static void changeToCall(InvokeInst *II, DeferredDominance *DDT = nullptr) {
SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end());
SmallVector<OperandBundleDef, 1> OpBundles;
II->getOperandBundlesAsDefs(OpBundles);
@@ -1491,11 +1953,16 @@ static void changeToCall(InvokeInst *II) {
II->replaceAllUsesWith(NewCall);
// Follow the call by a branch to the normal destination.
- BranchInst::Create(II->getNormalDest(), II);
+ BasicBlock *NormalDestBB = II->getNormalDest();
+ BranchInst::Create(NormalDestBB, II);
// Update PHI nodes in the unwind destination
- II->getUnwindDest()->removePredecessor(II->getParent());
+ BasicBlock *BB = II->getParent();
+ BasicBlock *UnwindDestBB = II->getUnwindDest();
+ UnwindDestBB->removePredecessor(BB);
II->eraseFromParent();
+ if (DDT)
+ DDT->deleteEdge(BB, UnwindDestBB);
}
BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
@@ -1536,7 +2003,8 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
}
static bool markAliveBlocks(Function &F,
- SmallPtrSetImpl<BasicBlock*> &Reachable) {
+ SmallPtrSetImpl<BasicBlock*> &Reachable,
+ DeferredDominance *DDT = nullptr) {
SmallVector<BasicBlock*, 128> Worklist;
BasicBlock *BB = &F.front();
Worklist.push_back(BB);
@@ -1549,41 +2017,44 @@ static bool markAliveBlocks(Function &F,
// instructions into LLVM unreachable insts. The instruction combining pass
// canonicalizes unreachable insts into stores to null or undef.
for (Instruction &I : *BB) {
- // Assumptions that are known to be false are equivalent to unreachable.
- // Also, if the condition is undefined, then we make the choice most
- // beneficial to the optimizer, and choose that to also be unreachable.
- if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
- if (II->getIntrinsicID() == Intrinsic::assume) {
- if (match(II->getArgOperand(0), m_CombineOr(m_Zero(), m_Undef()))) {
- // Don't insert a call to llvm.trap right before the unreachable.
- changeToUnreachable(II, false);
- Changed = true;
- break;
- }
- }
-
- if (II->getIntrinsicID() == Intrinsic::experimental_guard) {
- // A call to the guard intrinsic bails out of the current compilation
- // unit if the predicate passed to it is false. If the predicate is a
- // constant false, then we know the guard will bail out of the current
- // compile unconditionally, so all code following it is dead.
- //
- // Note: unlike in llvm.assume, it is not "obviously profitable" for
- // guards to treat `undef` as `false` since a guard on `undef` can
- // still be useful for widening.
- if (match(II->getArgOperand(0), m_Zero()))
- if (!isa<UnreachableInst>(II->getNextNode())) {
- changeToUnreachable(II->getNextNode(), /*UseLLVMTrap=*/ false);
+ if (auto *CI = dyn_cast<CallInst>(&I)) {
+ Value *Callee = CI->getCalledValue();
+ // Handle intrinsic calls.
+ if (Function *F = dyn_cast<Function>(Callee)) {
+ auto IntrinsicID = F->getIntrinsicID();
+ // Assumptions that are known to be false are equivalent to
+ // unreachable. Also, if the condition is undefined, then we make the
+ // choice most beneficial to the optimizer, and choose that to also be
+ // unreachable.
+ if (IntrinsicID == Intrinsic::assume) {
+ if (match(CI->getArgOperand(0), m_CombineOr(m_Zero(), m_Undef()))) {
+ // Don't insert a call to llvm.trap right before the unreachable.
+ changeToUnreachable(CI, false, false, DDT);
Changed = true;
break;
}
- }
- }
-
- if (auto *CI = dyn_cast<CallInst>(&I)) {
- Value *Callee = CI->getCalledValue();
- if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
- changeToUnreachable(CI, /*UseLLVMTrap=*/false);
+ } else if (IntrinsicID == Intrinsic::experimental_guard) {
+ // A call to the guard intrinsic bails out of the current
+ // compilation unit if the predicate passed to it is false. If the
+ // predicate is a constant false, then we know the guard will bail
+ // out of the current compile unconditionally, so all code following
+ // it is dead.
+ //
+ // Note: unlike in llvm.assume, it is not "obviously profitable" for
+ // guards to treat `undef` as `false` since a guard on `undef` can
+ // still be useful for widening.
+ if (match(CI->getArgOperand(0), m_Zero()))
+ if (!isa<UnreachableInst>(CI->getNextNode())) {
+ changeToUnreachable(CI->getNextNode(), /*UseLLVMTrap=*/false,
+ false, DDT);
+ Changed = true;
+ break;
+ }
+ }
+ } else if ((isa<ConstantPointerNull>(Callee) &&
+ !NullPointerIsDefined(CI->getFunction())) ||
+ isa<UndefValue>(Callee)) {
+ changeToUnreachable(CI, /*UseLLVMTrap=*/false, false, DDT);
Changed = true;
break;
}
@@ -1593,17 +2064,16 @@ static bool markAliveBlocks(Function &F,
// though.
if (!isa<UnreachableInst>(CI->getNextNode())) {
// Don't insert a call to llvm.trap right before the unreachable.
- changeToUnreachable(CI->getNextNode(), false);
+ changeToUnreachable(CI->getNextNode(), false, false, DDT);
Changed = true;
}
break;
}
- }
+ } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
+ // Store to undef and store to null are undefined and used to signal
+ // that they should be changed to unreachable by passes that can't
+ // modify the CFG.
- // Store to undef and store to null are undefined and used to signal that
- // they should be changed to unreachable by passes that can't modify the
- // CFG.
- if (auto *SI = dyn_cast<StoreInst>(&I)) {
// Don't touch volatile stores.
if (SI->isVolatile()) continue;
@@ -1611,8 +2081,9 @@ static bool markAliveBlocks(Function &F,
if (isa<UndefValue>(Ptr) ||
(isa<ConstantPointerNull>(Ptr) &&
- SI->getPointerAddressSpace() == 0)) {
- changeToUnreachable(SI, true);
+ !NullPointerIsDefined(SI->getFunction(),
+ SI->getPointerAddressSpace()))) {
+ changeToUnreachable(SI, true, false, DDT);
Changed = true;
break;
}
@@ -1623,17 +2094,23 @@ static bool markAliveBlocks(Function &F,
if (auto *II = dyn_cast<InvokeInst>(Terminator)) {
// Turn invokes that call 'nounwind' functions into ordinary calls.
Value *Callee = II->getCalledValue();
- if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
- changeToUnreachable(II, true);
+ if ((isa<ConstantPointerNull>(Callee) &&
+ !NullPointerIsDefined(BB->getParent())) ||
+ isa<UndefValue>(Callee)) {
+ changeToUnreachable(II, true, false, DDT);
Changed = true;
} else if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(&F)) {
if (II->use_empty() && II->onlyReadsMemory()) {
// jump to the normal destination branch.
- BranchInst::Create(II->getNormalDest(), II);
- II->getUnwindDest()->removePredecessor(II->getParent());
+ BasicBlock *NormalDestBB = II->getNormalDest();
+ BasicBlock *UnwindDestBB = II->getUnwindDest();
+ BranchInst::Create(NormalDestBB, II);
+ UnwindDestBB->removePredecessor(II->getParent());
II->eraseFromParent();
+ if (DDT)
+ DDT->deleteEdge(BB, UnwindDestBB);
} else
- changeToCall(II);
+ changeToCall(II, DDT);
Changed = true;
}
} else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Terminator)) {
@@ -1679,7 +2156,7 @@ static bool markAliveBlocks(Function &F,
}
}
- Changed |= ConstantFoldTerminator(BB, true);
+ Changed |= ConstantFoldTerminator(BB, true, nullptr, DDT);
for (BasicBlock *Successor : successors(BB))
if (Reachable.insert(Successor).second)
Worklist.push_back(Successor);
@@ -1687,11 +2164,11 @@ static bool markAliveBlocks(Function &F,
return Changed;
}
-void llvm::removeUnwindEdge(BasicBlock *BB) {
+void llvm::removeUnwindEdge(BasicBlock *BB, DeferredDominance *DDT) {
TerminatorInst *TI = BB->getTerminator();
if (auto *II = dyn_cast<InvokeInst>(TI)) {
- changeToCall(II);
+ changeToCall(II, DDT);
return;
}
@@ -1719,15 +2196,18 @@ void llvm::removeUnwindEdge(BasicBlock *BB) {
UnwindDest->removePredecessor(BB);
TI->replaceAllUsesWith(NewTI);
TI->eraseFromParent();
+ if (DDT)
+ DDT->deleteEdge(BB, UnwindDest);
}
/// removeUnreachableBlocks - Remove blocks that are not reachable, even
/// if they are in a dead cycle. Return true if a change was made, false
/// otherwise. If `LVI` is passed, this function preserves LazyValueInfo
/// after modifying the CFG.
-bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) {
+bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
+ DeferredDominance *DDT) {
SmallPtrSet<BasicBlock*, 16> Reachable;
- bool Changed = markAliveBlocks(F, Reachable);
+ bool Changed = markAliveBlocks(F, Reachable, DDT);
// If there are unreachable blocks in the CFG...
if (Reachable.size() == F.size())
@@ -1737,25 +2217,39 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) {
NumRemoved += F.size()-Reachable.size();
// Loop over all of the basic blocks that are not reachable, dropping all of
- // their internal references...
- for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
- if (Reachable.count(&*BB))
+ // their internal references. Update DDT and LVI if available.
+ std::vector <DominatorTree::UpdateType> Updates;
+ for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) {
+ auto *BB = &*I;
+ if (Reachable.count(BB))
continue;
-
- for (BasicBlock *Successor : successors(&*BB))
+ for (BasicBlock *Successor : successors(BB)) {
if (Reachable.count(Successor))
- Successor->removePredecessor(&*BB);
+ Successor->removePredecessor(BB);
+ if (DDT)
+ Updates.push_back({DominatorTree::Delete, BB, Successor});
+ }
if (LVI)
- LVI->eraseBlock(&*BB);
+ LVI->eraseBlock(BB);
BB->dropAllReferences();
}
- for (Function::iterator I = ++F.begin(); I != F.end();)
- if (!Reachable.count(&*I))
- I = F.getBasicBlockList().erase(I);
- else
+ for (Function::iterator I = ++F.begin(); I != F.end();) {
+ auto *BB = &*I;
+ if (Reachable.count(BB)) {
++I;
+ continue;
+ }
+ if (DDT) {
+ DDT->deleteBB(BB); // deferred deletion of BB.
+ ++I;
+ } else {
+ I = F.getBasicBlockList().erase(I);
+ }
+ }
+ if (DDT)
+ DDT->applyUpdates(Updates);
return true;
}
@@ -1848,8 +2342,8 @@ static unsigned replaceDominatedUsesWith(Value *From, Value *To,
if (!Dominates(Root, U))
continue;
U.set(To);
- DEBUG(dbgs() << "Replace dominated use of '" << From->getName() << "' as "
- << *To << " in " << *U << "\n");
+ LLVM_DEBUG(dbgs() << "Replace dominated use of '" << From->getName()
+ << "' as " << *To << " in " << *U << "\n");
++Count;
}
return Count;
@@ -1953,7 +2447,7 @@ void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
if (!NewTy->isPointerTy())
return;
- unsigned BitWidth = DL.getTypeSizeInBits(NewTy);
+ unsigned BitWidth = DL.getIndexTypeSizeInBits(NewTy);
if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
MDNode *NN = MDNode::get(OldLI.getContext(), None);
NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
@@ -2265,7 +2759,7 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {
// Static allocas (constant size in the entry block) are handled by
// prologue/epilogue insertion so they're free anyway. We definitely don't
// want to make them non-constant.
- return !dyn_cast<AllocaInst>(I)->isStaticAlloca();
+ return !cast<AllocaInst>(I)->isStaticAlloca();
case Instruction::GetElementPtr:
if (OpIdx == 0)
return true;
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
new file mode 100644
index 000000000000..6e92e679f999
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -0,0 +1,645 @@
+//===----------------- LoopRotationUtils.cpp -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides utilities to convert a loop into a loop with bottom test.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LoopRotationUtils.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-rotate"
+
+STATISTIC(NumRotated, "Number of loops rotated");
+
+namespace {
+/// A simple loop rotation transformation.
+class LoopRotate {
+ const unsigned MaxHeaderSize;
+ LoopInfo *LI;
+ const TargetTransformInfo *TTI;
+ AssumptionCache *AC;
+ DominatorTree *DT;
+ ScalarEvolution *SE;
+ const SimplifyQuery &SQ;
+ bool RotationOnly;
+ bool IsUtilMode;
+
+public:
+ LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
+ const TargetTransformInfo *TTI, AssumptionCache *AC,
+ DominatorTree *DT, ScalarEvolution *SE, const SimplifyQuery &SQ,
+ bool RotationOnly, bool IsUtilMode)
+ : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
+ SQ(SQ), RotationOnly(RotationOnly), IsUtilMode(IsUtilMode) {}
+ bool processLoop(Loop *L);
+
+private:
+ bool rotateLoop(Loop *L, bool SimplifiedLatch);
+ bool simplifyLoopLatch(Loop *L);
+};
+} // end anonymous namespace
+
+/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
+/// old header into the preheader. If there were uses of the values produced by
+/// these instruction that were outside of the loop, we have to insert PHI nodes
+/// to merge the two values. Do this now.
+static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
+ BasicBlock *OrigPreheader,
+ ValueToValueMapTy &ValueMap,
+ SmallVectorImpl<PHINode*> *InsertedPHIs) {
+ // Remove PHI node entries that are no longer live.
+ BasicBlock::iterator I, E = OrigHeader->end();
+ for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
+
+ // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
+ // as necessary.
+ SSAUpdater SSA(InsertedPHIs);
+ for (I = OrigHeader->begin(); I != E; ++I) {
+ Value *OrigHeaderVal = &*I;
+
+ // If there are no uses of the value (e.g. because it returns void), there
+ // is nothing to rewrite.
+ if (OrigHeaderVal->use_empty())
+ continue;
+
+ Value *OrigPreHeaderVal = ValueMap.lookup(OrigHeaderVal);
+
+ // The value now exits in two versions: the initial value in the preheader
+ // and the loop "next" value in the original header.
+ SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
+ SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
+ SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
+
+ // Visit each use of the OrigHeader instruction.
+ for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
+ UE = OrigHeaderVal->use_end();
+ UI != UE;) {
+ // Grab the use before incrementing the iterator.
+ Use &U = *UI;
+
+ // Increment the iterator before removing the use from the list.
+ ++UI;
+
+ // SSAUpdater can't handle a non-PHI use in the same block as an
+ // earlier def. We can easily handle those cases manually.
+ Instruction *UserInst = cast<Instruction>(U.getUser());
+ if (!isa<PHINode>(UserInst)) {
+ BasicBlock *UserBB = UserInst->getParent();
+
+ // The original users in the OrigHeader are already using the
+ // original definitions.
+ if (UserBB == OrigHeader)
+ continue;
+
+ // Users in the OrigPreHeader need to use the value to which the
+ // original definitions are mapped.
+ if (UserBB == OrigPreheader) {
+ U = OrigPreHeaderVal;
+ continue;
+ }
+ }
+
+ // Anything else can be handled by SSAUpdater.
+ SSA.RewriteUse(U);
+ }
+
+ // Replace MetadataAsValue(ValueAsMetadata(OrigHeaderVal)) uses in debug
+ // intrinsics.
+ SmallVector<DbgValueInst *, 1> DbgValues;
+ llvm::findDbgValues(DbgValues, OrigHeaderVal);
+ for (auto &DbgValue : DbgValues) {
+ // The original users in the OrigHeader are already using the original
+ // definitions.
+ BasicBlock *UserBB = DbgValue->getParent();
+ if (UserBB == OrigHeader)
+ continue;
+
+ // Users in the OrigPreHeader need to use the value to which the
+ // original definitions are mapped and anything else can be handled by
+ // the SSAUpdater. To avoid adding PHINodes, check if the value is
+ // available in UserBB, if not substitute undef.
+ Value *NewVal;
+ if (UserBB == OrigPreheader)
+ NewVal = OrigPreHeaderVal;
+ else if (SSA.HasValueForBlock(UserBB))
+ NewVal = SSA.GetValueInMiddleOfBlock(UserBB);
+ else
+ NewVal = UndefValue::get(OrigHeaderVal->getType());
+ DbgValue->setOperand(0,
+ MetadataAsValue::get(OrigHeaderVal->getContext(),
+ ValueAsMetadata::get(NewVal)));
+ }
+ }
+}
+
+// Look for a phi which is only used outside the loop (via a LCSSA phi)
+// in the exit from the header. This means that rotating the loop can
+// remove the phi.
+static bool shouldRotateLoopExitingLatch(Loop *L) {
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *HeaderExit = Header->getTerminator()->getSuccessor(0);
+ if (L->contains(HeaderExit))
+ HeaderExit = Header->getTerminator()->getSuccessor(1);
+
+ for (auto &Phi : Header->phis()) {
+ // Look for uses of this phi in the loop/via exits other than the header.
+ if (llvm::any_of(Phi.users(), [HeaderExit](const User *U) {
+ return cast<Instruction>(U)->getParent() != HeaderExit;
+ }))
+ continue;
+ return true;
+ }
+
+ return false;
+}
+
+/// Rotate loop LP. Return true if the loop is rotated.
+///
+/// \param SimplifiedLatch is true if the latch was just folded into the final
+/// loop exit. In this case we may want to rotate even though the new latch is
+/// now an exiting branch. This rotation would have happened had the latch not
+/// been simplified. However, if SimplifiedLatch is false, then we avoid
+/// rotating loops in which the latch exits to avoid excessive or endless
+/// rotation. LoopRotate should be repeatable and converge to a canonical
+/// form. This property is satisfied because simplifying the loop latch can only
+/// happen once across multiple invocations of the LoopRotate pass.
+bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
+ // If the loop has only one block then there is not much to rotate.
+ if (L->getBlocks().size() == 1)
+ return false;
+
+ BasicBlock *OrigHeader = L->getHeader();
+ BasicBlock *OrigLatch = L->getLoopLatch();
+
+ BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
+ if (!BI || BI->isUnconditional())
+ return false;
+
+ // If the loop header is not one of the loop exiting blocks then
+ // either this loop is already rotated or it is not
+ // suitable for loop rotation transformations.
+ if (!L->isLoopExiting(OrigHeader))
+ return false;
+
+ // If the loop latch already contains a branch that leaves the loop then the
+ // loop is already rotated.
+ if (!OrigLatch)
+ return false;
+
+ // Rotate if either the loop latch does *not* exit the loop, or if the loop
+ // latch was just simplified. Or if we think it will be profitable.
+ if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false &&
+ !shouldRotateLoopExitingLatch(L))
+ return false;
+
+ // Check size of original header and reject loop if it is very big or we can't
+ // duplicate blocks inside it.
+ {
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(L, AC, EphValues);
+
+ CodeMetrics Metrics;
+ Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues);
+ if (Metrics.notDuplicatable) {
+ LLVM_DEBUG(
+ dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
+ << " instructions: ";
+ L->dump());
+ return false;
+ }
+ if (Metrics.convergent) {
+ LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent "
+ "instructions: ";
+ L->dump());
+ return false;
+ }
+ if (Metrics.NumInsts > MaxHeaderSize)
+ return false;
+ }
+
+ // Now, this loop is suitable for rotation.
+ BasicBlock *OrigPreheader = L->getLoopPreheader();
+
+ // If the loop could not be converted to canonical form, it must have an
+ // indirectbr in it, just give up.
+ if (!OrigPreheader || !L->hasDedicatedExits())
+ return false;
+
+ // Anything ScalarEvolution may know about this loop or the PHI nodes
+ // in its header will soon be invalidated. We should also invalidate
+ // all outer loops because insertion and deletion of blocks that happens
+ // during the rotation may violate invariants related to backedge taken
+ // infos in them.
+ if (SE)
+ SE->forgetTopmostLoop(L);
+
+ LLVM_DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
+
+ // Find new Loop header. NewHeader is a Header's one and only successor
+ // that is inside loop. Header's other successor is outside the
+ // loop. Otherwise loop is not suitable for rotation.
+ BasicBlock *Exit = BI->getSuccessor(0);
+ BasicBlock *NewHeader = BI->getSuccessor(1);
+ if (L->contains(Exit))
+ std::swap(Exit, NewHeader);
+ assert(NewHeader && "Unable to determine new loop header");
+ assert(L->contains(NewHeader) && !L->contains(Exit) &&
+ "Unable to determine loop header and exit blocks");
+
+ // This code assumes that the new header has exactly one predecessor.
+ // Remove any single-entry PHI nodes in it.
+ assert(NewHeader->getSinglePredecessor() &&
+ "New header doesn't have one pred!");
+ FoldSingleEntryPHINodes(NewHeader);
+
+ // Begin by walking OrigHeader and populating ValueMap with an entry for
+ // each Instruction.
+ BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
+ ValueToValueMapTy ValueMap;
+
+ // For PHI nodes, the value available in OldPreHeader is just the
+ // incoming value from OldPreHeader.
+ for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader);
+
+ // For the rest of the instructions, either hoist to the OrigPreheader if
+ // possible or create a clone in the OldPreHeader if not.
+ TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
+
+ // Record all debug intrinsics preceding LoopEntryBranch to avoid duplication.
+ using DbgIntrinsicHash =
+ std::pair<std::pair<Value *, DILocalVariable *>, DIExpression *>;
+ auto makeHash = [](DbgInfoIntrinsic *D) -> DbgIntrinsicHash {
+ return {{D->getVariableLocation(), D->getVariable()}, D->getExpression()};
+ };
+ SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics;
+ for (auto I = std::next(OrigPreheader->rbegin()), E = OrigPreheader->rend();
+ I != E; ++I) {
+ if (auto *DII = dyn_cast<DbgInfoIntrinsic>(&*I))
+ DbgIntrinsics.insert(makeHash(DII));
+ else
+ break;
+ }
+
+ while (I != E) {
+ Instruction *Inst = &*I++;
+
+ // If the instruction's operands are invariant and it doesn't read or write
+ // memory, then it is safe to hoist. Doing this doesn't change the order of
+ // execution in the preheader, but does prevent the instruction from
+ // executing in each iteration of the loop. This means it is safe to hoist
+ // something that might trap, but isn't safe to hoist something that reads
+ // memory (without proving that the loop doesn't write).
+ if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() &&
+ !Inst->mayWriteToMemory() && !isa<TerminatorInst>(Inst) &&
+ !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) {
+ Inst->moveBefore(LoopEntryBranch);
+ continue;
+ }
+
+ // Otherwise, create a duplicate of the instruction.
+ Instruction *C = Inst->clone();
+
+ // Eagerly remap the operands of the instruction.
+ RemapInstruction(C, ValueMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+
+ // Avoid inserting the same intrinsic twice.
+ if (auto *DII = dyn_cast<DbgInfoIntrinsic>(C))
+ if (DbgIntrinsics.count(makeHash(DII))) {
+ C->deleteValue();
+ continue;
+ }
+
+ // With the operands remapped, see if the instruction constant folds or is
+ // otherwise simplifyable. This commonly occurs because the entry from PHI
+ // nodes allows icmps and other instructions to fold.
+ Value *V = SimplifyInstruction(C, SQ);
+ if (V && LI->replacementPreservesLCSSAForm(C, V)) {
+ // If so, then delete the temporary instruction and stick the folded value
+ // in the map.
+ ValueMap[Inst] = V;
+ if (!C->mayHaveSideEffects()) {
+ C->deleteValue();
+ C = nullptr;
+ }
+ } else {
+ ValueMap[Inst] = C;
+ }
+ if (C) {
+ // Otherwise, stick the new instruction into the new block!
+ C->setName(Inst->getName());
+ C->insertBefore(LoopEntryBranch);
+
+ if (auto *II = dyn_cast<IntrinsicInst>(C))
+ if (II->getIntrinsicID() == Intrinsic::assume)
+ AC->registerAssumption(II);
+ }
+ }
+
+ // Along with all the other instructions, we just cloned OrigHeader's
+ // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
+ // successors by duplicating their incoming values for OrigHeader.
+ TerminatorInst *TI = OrigHeader->getTerminator();
+ for (BasicBlock *SuccBB : TI->successors())
+ for (BasicBlock::iterator BI = SuccBB->begin();
+ PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+ PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
+
+ // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
+ // OrigPreHeader's old terminator (the original branch into the loop), and
+ // remove the corresponding incoming values from the PHI nodes in OrigHeader.
+ LoopEntryBranch->eraseFromParent();
+
+
+ SmallVector<PHINode*, 2> InsertedPHIs;
+ // If there were any uses of instructions in the duplicated block outside the
+ // loop, update them, inserting PHI nodes as required
+ RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap,
+ &InsertedPHIs);
+
+ // Attach dbg.value intrinsics to the new phis if that phi uses a value that
+ // previously had debug metadata attached. This keeps the debug info
+ // up-to-date in the loop body.
+ if (!InsertedPHIs.empty())
+ insertDebugValuesForPHIs(OrigHeader, InsertedPHIs);
+
+ // NewHeader is now the header of the loop.
+ L->moveToHeader(NewHeader);
+ assert(L->getHeader() == NewHeader && "Latch block is our new header");
+
+ // Inform DT about changes to the CFG.
+ if (DT) {
+ // The OrigPreheader branches to the NewHeader and Exit now. Then, inform
+ // the DT about the removed edge to the OrigHeader (that got removed).
+ SmallVector<DominatorTree::UpdateType, 3> Updates;
+ Updates.push_back({DominatorTree::Insert, OrigPreheader, Exit});
+ Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader});
+ Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader});
+ DT->applyUpdates(Updates);
+ }
+
+ // At this point, we've finished our major CFG changes. As part of cloning
+ // the loop into the preheader we've simplified instructions and the
+ // duplicated conditional branch may now be branching on a constant. If it is
+ // branching on a constant and if that constant means that we enter the loop,
+ // then we fold away the cond branch to an uncond branch. This simplifies the
+ // loop in cases important for nested loops, and it also means we don't have
+ // to split as many edges.
+ BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
+ assert(PHBI->isConditional() && "Should be clone of BI condbr!");
+ if (!isa<ConstantInt>(PHBI->getCondition()) ||
+ PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) !=
+ NewHeader) {
+ // The conditional branch can't be folded, handle the general case.
+ // Split edges as necessary to preserve LoopSimplify form.
+
+ // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
+ // thus is not a preheader anymore.
+ // Split the edge to form a real preheader.
+ BasicBlock *NewPH = SplitCriticalEdge(
+ OrigPreheader, NewHeader,
+ CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA());
+ NewPH->setName(NewHeader->getName() + ".lr.ph");
+
+ // Preserve canonical loop form, which means that 'Exit' should have only
+ // one predecessor. Note that Exit could be an exit block for multiple
+ // nested loops, causing both of the edges to now be critical and need to
+ // be split.
+ SmallVector<BasicBlock *, 4> ExitPreds(pred_begin(Exit), pred_end(Exit));
+ bool SplitLatchEdge = false;
+ for (BasicBlock *ExitPred : ExitPreds) {
+ // We only need to split loop exit edges.
+ Loop *PredLoop = LI->getLoopFor(ExitPred);
+ if (!PredLoop || PredLoop->contains(Exit))
+ continue;
+ if (isa<IndirectBrInst>(ExitPred->getTerminator()))
+ continue;
+ SplitLatchEdge |= L->getLoopLatch() == ExitPred;
+ BasicBlock *ExitSplit = SplitCriticalEdge(
+ ExitPred, Exit,
+ CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA());
+ ExitSplit->moveBefore(Exit);
+ }
+ assert(SplitLatchEdge &&
+ "Despite splitting all preds, failed to split latch exit?");
+ } else {
+ // We can fold the conditional branch in the preheader, this makes things
+ // simpler. The first step is to remove the extra edge to the Exit block.
+ Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
+ BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
+ NewBI->setDebugLoc(PHBI->getDebugLoc());
+ PHBI->eraseFromParent();
+
+ // With our CFG finalized, update DomTree if it is available.
+ if (DT) DT->deleteEdge(OrigPreheader, Exit);
+ }
+
+ assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
+ assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
+
+ // Now that the CFG and DomTree are in a consistent state again, try to merge
+ // the OrigHeader block into OrigLatch. This will succeed if they are
+ // connected by an unconditional branch. This is just a cleanup so the
+ // emitted code isn't too gross in this common case.
+ MergeBlockIntoPredecessor(OrigHeader, DT, LI);
+
+ LLVM_DEBUG(dbgs() << "LoopRotation: into "; L->dump());
+
+ ++NumRotated;
+ return true;
+}
+
+/// Determine whether the instructions in this range may be safely and cheaply
+/// speculated. This is not an important enough situation to develop complex
+/// heuristics. We handle a single arithmetic instruction along with any type
+/// conversions.
+static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
+ BasicBlock::iterator End, Loop *L) {
+ bool seenIncrement = false;
+ bool MultiExitLoop = false;
+
+ if (!L->getExitingBlock())
+ MultiExitLoop = true;
+
+ for (BasicBlock::iterator I = Begin; I != End; ++I) {
+
+ if (!isSafeToSpeculativelyExecute(&*I))
+ return false;
+
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+
+ switch (I->getOpcode()) {
+ default:
+ return false;
+ case Instruction::GetElementPtr:
+ // GEPs are cheap if all indices are constant.
+ if (!cast<GEPOperator>(I)->hasAllConstantIndices())
+ return false;
+ // fall-thru to increment case
+ LLVM_FALLTHROUGH;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr: {
+ Value *IVOpnd =
+ !isa<Constant>(I->getOperand(0))
+ ? I->getOperand(0)
+ : !isa<Constant>(I->getOperand(1)) ? I->getOperand(1) : nullptr;
+ if (!IVOpnd)
+ return false;
+
+ // If increment operand is used outside of the loop, this speculation
+ // could cause extra live range interference.
+ if (MultiExitLoop) {
+ for (User *UseI : IVOpnd->users()) {
+ auto *UserInst = cast<Instruction>(UseI);
+ if (!L->contains(UserInst))
+ return false;
+ }
+ }
+
+ if (seenIncrement)
+ return false;
+ seenIncrement = true;
+ break;
+ }
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // ignore type conversions
+ break;
+ }
+ }
+ return true;
+}
+
+/// Fold the loop tail into the loop exit by speculating the loop tail
+/// instructions. Typically, this is a single post-increment. In the case of a
+/// simple 2-block loop, hoisting the increment can be much better than
+/// duplicating the entire loop header. In the case of loops with early exits,
+/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
+/// canonical form so downstream passes can handle it.
+///
+/// I don't believe this invalidates SCEV.
+bool LoopRotate::simplifyLoopLatch(Loop *L) {
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch || Latch->hasAddressTaken())
+ return false;
+
+ BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
+ if (!Jmp || !Jmp->isUnconditional())
+ return false;
+
+ BasicBlock *LastExit = Latch->getSinglePredecessor();
+ if (!LastExit || !L->isLoopExiting(LastExit))
+ return false;
+
+ BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
+ if (!BI)
+ return false;
+
+ if (!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
+ << LastExit->getName() << "\n");
+
+ // Hoist the instructions from Latch into LastExit.
+ LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(),
+ Latch->begin(), Jmp->getIterator());
+
+ unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
+ BasicBlock *Header = Jmp->getSuccessor(0);
+ assert(Header == L->getHeader() && "expected a backward branch");
+
+ // Remove Latch from the CFG so that LastExit becomes the new Latch.
+ BI->setSuccessor(FallThruPath, Header);
+ Latch->replaceSuccessorsPhiUsesWith(LastExit);
+ Jmp->eraseFromParent();
+
+ // Nuke the Latch block.
+ assert(Latch->empty() && "unable to evacuate Latch");
+ LI->removeBlock(Latch);
+ if (DT)
+ DT->eraseNode(Latch);
+ Latch->eraseFromParent();
+ return true;
+}
+
+/// Rotate \c L, and return true if any modification was made.
+bool LoopRotate::processLoop(Loop *L) {
+ // Save the loop metadata.
+ MDNode *LoopMD = L->getLoopID();
+
+ bool SimplifiedLatch = false;
+
+ // Simplify the loop latch before attempting to rotate the header
+ // upward. Rotation may not be needed if the loop tail can be folded into the
+ // loop exit.
+ if (!RotationOnly)
+ SimplifiedLatch = simplifyLoopLatch(L);
+
+ bool MadeChange = rotateLoop(L, SimplifiedLatch);
+ assert((!MadeChange || L->isLoopExiting(L->getLoopLatch())) &&
+ "Loop latch should be exiting after loop-rotate.");
+
+ // Restore the loop metadata.
+ // NB! We presume LoopRotation DOESN'T ADD its own metadata.
+ if ((MadeChange || SimplifiedLatch) && LoopMD)
+ L->setLoopID(LoopMD);
+
+ return MadeChange || SimplifiedLatch;
+}
+
+
+/// The utility to convert a loop into a loop with bottom test.
+bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
+ AssumptionCache *AC, DominatorTree *DT,
+ ScalarEvolution *SE, const SimplifyQuery &SQ,
+ bool RotationOnly = true,
+ unsigned Threshold = unsigned(-1),
+ bool IsUtilMode = true) {
+ LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, SQ, RotationOnly, IsUtilMode);
+
+ return LR.processLoop(L);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index f43af9772771..970494eb4704 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -52,6 +52,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -64,9 +65,8 @@
#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
@@ -141,8 +141,8 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
if (!PreheaderBB)
return nullptr;
- DEBUG(dbgs() << "LoopSimplify: Creating pre-header "
- << PreheaderBB->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "LoopSimplify: Creating pre-header "
+ << PreheaderBB->getName() << "\n");
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
@@ -170,7 +170,7 @@ static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
} while (!Worklist.empty());
}
-/// \brief The first part of loop-nestification is to find a PHI node that tells
+/// The first part of loop-nestification is to find a PHI node that tells
/// us how to partition the loops.
static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT,
AssumptionCache *AC) {
@@ -195,7 +195,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT,
return nullptr;
}
-/// \brief If this loop has multiple backedges, try to pull one of them out into
+/// If this loop has multiple backedges, try to pull one of them out into
/// a nested loop.
///
/// This is important for code that looks like
@@ -242,7 +242,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
OuterLoopPreds.push_back(PN->getIncomingBlock(i));
}
}
- DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
+ LLVM_DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
// If ScalarEvolution is around and knows anything about values in
// this loop, tell it to forget them, because we're about to
@@ -332,7 +332,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
return NewOuter;
}
-/// \brief This method is called when the specified loop has more than one
+/// This method is called when the specified loop has more than one
/// backedge in it.
///
/// If this occurs, revector all of these backedges to target a new basic block
@@ -371,8 +371,8 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
BETerminator->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc());
- DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "
- << BEBlock->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "
+ << BEBlock->getName() << "\n");
// Move the new backedge block to right after the last backedge block.
Function::iterator InsertPos = ++BackedgeBlocks.back()->getIterator();
@@ -457,7 +457,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
return BEBlock;
}
-/// \brief Simplify one loop and queue further loops for simplification.
+/// Simplify one loop and queue further loops for simplification.
static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
DominatorTree *DT, LoopInfo *LI,
ScalarEvolution *SE, AssumptionCache *AC,
@@ -484,8 +484,8 @@ ReprocessLoop:
// Delete each unique out-of-loop (and thus dead) predecessor.
for (BasicBlock *P : BadPreds) {
- DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
- << P->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
+ << P->getName() << "\n");
// Zap the dead pred's terminator and replace it with unreachable.
TerminatorInst *TI = P->getTerminator();
@@ -504,16 +504,13 @@ ReprocessLoop:
if (BI->isConditional()) {
if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
- DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
- << ExitingBlock->getName() << "\n");
+ LLVM_DEBUG(dbgs()
+ << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
+ << ExitingBlock->getName() << "\n");
BI->setCondition(ConstantInt::get(Cond->getType(),
!L->contains(BI->getSuccessor(0))));
- // This may make the loop analyzable, force SCEV recomputation.
- if (SE)
- SE->forgetLoop(L);
-
Changed = true;
}
}
@@ -617,11 +614,8 @@ ReprocessLoop:
// comparison and the branch.
bool AllInvariant = true;
bool AnyInvariant = false;
- for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
+ for (auto I = ExitingBlock->instructionsWithoutDebug().begin(); &*I != BI; ) {
Instruction *Inst = &*I++;
- // Skip debug info intrinsics.
- if (isa<DbgInfoIntrinsic>(Inst))
- continue;
if (Inst == CI)
continue;
if (!L->makeLoopInvariant(Inst, AnyInvariant,
@@ -648,15 +642,8 @@ ReprocessLoop:
// Success. The block is now dead, so remove it from the loop,
// update the dominator tree and delete it.
- DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
- << ExitingBlock->getName() << "\n");
-
- // Notify ScalarEvolution before deleting this block. Currently assume the
- // parent loop doesn't change (spliting edges doesn't count). If blocks,
- // CFG edges, or other values in the parent loop change, then we need call
- // to forgetLoop() for the parent instead.
- if (SE)
- SE->forgetLoop(L);
+ LLVM_DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
+ << ExitingBlock->getName() << "\n");
assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
Changed = true;
@@ -679,6 +666,12 @@ ReprocessLoop:
}
}
+ // Changing exit conditions for blocks may affect exit counts of this loop and
+ // any of its paretns, so we must invalidate the entire subtree if we've made
+ // any changes.
+ if (Changed && SE)
+ SE->forgetTopmostLoop(L);
+
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 92dfb1c7204d..04b8c1417e0a 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -33,7 +34,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
@@ -63,8 +63,7 @@ UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,
/// Convert the instruction operands from referencing the current values into
/// those specified by VMap.
-static inline void remapInstruction(Instruction *I,
- ValueToValueMapTy &VMap) {
+void llvm::remapInstruction(Instruction *I, ValueToValueMapTy &VMap) {
for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
Value *Op = I->getOperand(op);
@@ -97,16 +96,10 @@ static inline void remapInstruction(Instruction *I,
/// Folds a basic block into its predecessor if it only has one predecessor, and
/// that predecessor only has one successor.
-/// The LoopInfo Analysis that is passed will be kept consistent. If folding is
-/// successful references to the containing loop must be removed from
-/// ScalarEvolution by calling ScalarEvolution::forgetLoop because SE may have
-/// references to the eliminated BB. The argument ForgottenLoops contains a set
-/// of loops that have already been forgotten to prevent redundant, expensive
-/// calls to ScalarEvolution::forgetLoop. Returns the new combined block.
-static BasicBlock *
-foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI, ScalarEvolution *SE,
- SmallPtrSetImpl<Loop *> &ForgottenLoops,
- DominatorTree *DT) {
+/// The LoopInfo Analysis that is passed will be kept consistent.
+BasicBlock *llvm::foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI,
+ ScalarEvolution *SE,
+ DominatorTree *DT) {
// Merge basic blocks into their predecessor if there is only one distinct
// pred, and if there is only one distinct successor of the predecessor, and
// if there are no PHI nodes.
@@ -116,7 +109,8 @@ foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI, ScalarEvolution *SE,
if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
return nullptr;
- DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred);
+ LLVM_DEBUG(dbgs() << "Merging: " << BB->getName() << " into "
+ << OnlyPred->getName() << "\n");
// Resolve any PHI nodes at the start of the block. They are all
// guaranteed to have exactly one entry if they exist, unless there are
@@ -149,13 +143,6 @@ foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI, ScalarEvolution *SE,
DT->eraseNode(BB);
}
- // ScalarEvolution holds references to loop exit blocks.
- if (SE) {
- if (Loop *L = LI->getLoopFor(BB)) {
- if (ForgottenLoops.insert(L).second)
- SE->forgetLoop(L);
- }
- }
LI->removeBlock(BB);
// Inherit predecessor's name if it exists...
@@ -265,6 +252,48 @@ static bool isEpilogProfitable(Loop *L) {
return false;
}
+/// Perform some cleanup and simplifications on loops after unrolling. It is
+/// useful to simplify the IV's in the new loop, as well as do a quick
+/// simplify/dce pass of the instructions.
+void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
+ ScalarEvolution *SE, DominatorTree *DT,
+ AssumptionCache *AC) {
+ // Simplify any new induction variables in the partially unrolled loop.
+ if (SE && SimplifyIVs) {
+ SmallVector<WeakTrackingVH, 16> DeadInsts;
+ simplifyLoopIVs(L, SE, DT, LI, DeadInsts);
+
+ // Aggressively clean up dead instructions that simplifyLoopIVs already
+ // identified. Any remaining should be cleaned up below.
+ while (!DeadInsts.empty())
+ if (Instruction *Inst =
+ dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
+ RecursivelyDeleteTriviallyDeadInstructions(Inst);
+ }
+
+ // At this point, the code is well formed. We now do a quick sweep over the
+ // inserted code, doing constant propagation and dead code elimination as we
+ // go.
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ const std::vector<BasicBlock *> &NewLoopBlocks = L->getBlocks();
+ for (BasicBlock *BB : NewLoopBlocks) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
+ Instruction *Inst = &*I++;
+
+ if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC}))
+ if (LI->replacementPreservesLCSSAForm(Inst, V))
+ Inst->replaceAllUsesWith(V);
+ if (isInstructionTriviallyDead(Inst))
+ BB->getInstList().erase(Inst);
+ }
+ }
+
+ // TODO: after peeling or unrolling, previously loop variant conditions are
+ // likely to fold to constants, eagerly propagating those here will require
+ // fewer cleanup passes to be run. Alternatively, a LoopEarlyCSE might be
+ // appropriate.
+}
+
/// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
/// branch instruction. However, if the trip count (and multiple) are not known,
@@ -310,19 +339,19 @@ LoopUnrollResult llvm::UnrollLoop(
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
- DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
+ LLVM_DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
return LoopUnrollResult::Unmodified;
}
BasicBlock *LatchBlock = L->getLoopLatch();
if (!LatchBlock) {
- DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n");
+ LLVM_DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n");
return LoopUnrollResult::Unmodified;
}
// Loops with indirectbr cannot be cloned.
if (!L->isSafeToClone()) {
- DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n");
+ LLVM_DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n");
return LoopUnrollResult::Unmodified;
}
@@ -335,8 +364,9 @@ LoopUnrollResult llvm::UnrollLoop(
if (!BI || BI->isUnconditional()) {
// The loop-rotate pass can be helpful to avoid this in many cases.
- DEBUG(dbgs() <<
- " Can't unroll; loop not terminated by a conditional branch.\n");
+ LLVM_DEBUG(
+ dbgs()
+ << " Can't unroll; loop not terminated by a conditional branch.\n");
return LoopUnrollResult::Unmodified;
}
@@ -345,22 +375,22 @@ LoopUnrollResult llvm::UnrollLoop(
};
if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) {
- DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch"
- " exiting the loop can be unrolled\n");
+ LLVM_DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch"
+ " exiting the loop can be unrolled\n");
return LoopUnrollResult::Unmodified;
}
if (Header->hasAddressTaken()) {
// The loop-rotate pass can be helpful to avoid this in many cases.
- DEBUG(dbgs() <<
- " Won't unroll loop: address of header block is taken.\n");
+ LLVM_DEBUG(
+ dbgs() << " Won't unroll loop: address of header block is taken.\n");
return LoopUnrollResult::Unmodified;
}
if (TripCount != 0)
- DEBUG(dbgs() << " Trip Count = " << TripCount << "\n");
+ LLVM_DEBUG(dbgs() << " Trip Count = " << TripCount << "\n");
if (TripMultiple != 1)
- DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n");
+ LLVM_DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n");
// Effectively "DCE" unrolled iterations that are beyond the tripcount
// and will never be executed.
@@ -369,7 +399,7 @@ LoopUnrollResult llvm::UnrollLoop(
// Don't enter the unroll code if there is nothing to do.
if (TripCount == 0 && Count < 2 && PeelCount == 0) {
- DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
+ LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
return LoopUnrollResult::Unmodified;
}
@@ -403,8 +433,9 @@ LoopUnrollResult llvm::UnrollLoop(
"Did not expect runtime trip-count unrolling "
"and peeling for the same loop");
+ bool Peeled = false;
if (PeelCount) {
- bool Peeled = peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA);
+ Peeled = peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA);
// Successful peeling may result in a change in the loop preheader/trip
// counts. If we later unroll the loop, we want these to be updated.
@@ -419,7 +450,7 @@ LoopUnrollResult llvm::UnrollLoop(
// Loops containing convergent instructions must have a count that divides
// their TripMultiple.
- DEBUG(
+ LLVM_DEBUG(
{
bool HasConvergent = false;
for (auto &BB : L->blocks())
@@ -442,18 +473,12 @@ LoopUnrollResult llvm::UnrollLoop(
if (Force)
RuntimeTripCount = false;
else {
- DEBUG(
- dbgs() << "Wont unroll; remainder loop could not be generated"
- "when assuming runtime trip count\n");
+ LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be "
+ "generated when assuming runtime trip count\n");
return LoopUnrollResult::Unmodified;
}
}
- // Notify ScalarEvolution that the loop will be substantially changed,
- // if not outright eliminated.
- if (SE)
- SE->forgetLoop(L);
-
// If we know the trip count, we know the multiple...
unsigned BreakoutTrip = 0;
if (TripCount != 0) {
@@ -468,8 +493,8 @@ LoopUnrollResult llvm::UnrollLoop(
using namespace ore;
// Report the unrolling decision.
if (CompletelyUnroll) {
- DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
- << " with trip count " << TripCount << "!\n");
+ LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
+ << " with trip count " << TripCount << "!\n");
if (ORE)
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
@@ -478,8 +503,8 @@ LoopUnrollResult llvm::UnrollLoop(
<< NV("UnrollCount", TripCount) << " iterations";
});
} else if (PeelCount) {
- DEBUG(dbgs() << "PEELING loop %" << Header->getName()
- << " with iteration count " << PeelCount << "!\n");
+ LLVM_DEBUG(dbgs() << "PEELING loop %" << Header->getName()
+ << " with iteration count " << PeelCount << "!\n");
if (ORE)
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
@@ -495,31 +520,42 @@ LoopUnrollResult llvm::UnrollLoop(
<< NV("UnrollCount", Count);
};
- DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
- << " by " << Count);
+ LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by "
+ << Count);
if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
- DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
+ LLVM_DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
if (ORE)
ORE->emit([&]() {
return DiagBuilder() << " with a breakout at trip "
<< NV("BreakoutTrip", BreakoutTrip);
});
} else if (TripMultiple != 1) {
- DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
+ LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
if (ORE)
ORE->emit([&]() {
return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple)
<< " trips per branch";
});
} else if (RuntimeTripCount) {
- DEBUG(dbgs() << " with run-time trip count");
+ LLVM_DEBUG(dbgs() << " with run-time trip count");
if (ORE)
ORE->emit(
[&]() { return DiagBuilder() << " with run-time trip count"; });
}
- DEBUG(dbgs() << "!\n");
+ LLVM_DEBUG(dbgs() << "!\n");
}
+ // We are going to make changes to this loop. SCEV may be keeping cached info
+ // about it, in particular about backedge taken count. The changes we make
+ // are guaranteed to invalidate this information for our loop. It is tempting
+ // to only invalidate the loop being unrolled, but it is incorrect as long as
+ // all exiting branches from all inner loops have impact on the outer loops,
+ // and if something changes inside them then any of outer loops may also
+ // change. When we forget outermost loop, we also forget all contained loops
+ // and this is what we need here.
+ if (SE)
+ SE->forgetTopmostLoop(L);
+
bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
@@ -577,14 +613,9 @@ LoopUnrollResult llvm::UnrollLoop(
"Header should not be in a sub-loop");
// Tell LI about New.
const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops);
- if (OldLoop) {
+ if (OldLoop)
LoopsToSimplify.insert(NewLoops[OldLoop]);
- // Forget the old loop, since its inputs may have changed.
- if (SE)
- SE->forgetLoop(OldLoop);
- }
-
if (*BB == Header)
// Loop over all of the PHI nodes in the block, changing them to use
// the incoming values from the previous block.
@@ -769,17 +800,15 @@ LoopUnrollResult llvm::UnrollLoop(
}
}
- if (DT && UnrollVerifyDomtree)
- DT->verifyDomTree();
+ assert(!DT || !UnrollVerifyDomtree ||
+ DT->verify(DominatorTree::VerificationLevel::Fast));
// Merge adjacent basic blocks, if possible.
- SmallPtrSet<Loop *, 4> ForgottenLoops;
for (BasicBlock *Latch : Latches) {
BranchInst *Term = cast<BranchInst>(Latch->getTerminator());
if (Term->isUnconditional()) {
BasicBlock *Dest = Term->getSuccessor(0);
- if (BasicBlock *Fold =
- foldBlockIntoPredecessor(Dest, LI, SE, ForgottenLoops, DT)) {
+ if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, DT)) {
// Dest has been folded into Fold. Update our worklists accordingly.
std::replace(Latches.begin(), Latches.end(), Dest, Fold);
UnrolledLoopBlocks.erase(std::remove(UnrolledLoopBlocks.begin(),
@@ -789,40 +818,10 @@ LoopUnrollResult llvm::UnrollLoop(
}
}
- // Simplify any new induction variables in the partially unrolled loop.
- if (SE && !CompletelyUnroll && Count > 1) {
- SmallVector<WeakTrackingVH, 16> DeadInsts;
- simplifyLoopIVs(L, SE, DT, LI, DeadInsts);
-
- // Aggressively clean up dead instructions that simplifyLoopIVs already
- // identified. Any remaining should be cleaned up below.
- while (!DeadInsts.empty())
- if (Instruction *Inst =
- dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
- RecursivelyDeleteTriviallyDeadInstructions(Inst);
- }
-
- // At this point, the code is well formed. We now do a quick sweep over the
- // inserted code, doing constant propagation and dead code elimination as we
- // go.
- const DataLayout &DL = Header->getModule()->getDataLayout();
- const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
- for (BasicBlock *BB : NewLoopBlocks) {
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
- Instruction *Inst = &*I++;
-
- if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC}))
- if (LI->replacementPreservesLCSSAForm(Inst, V))
- Inst->replaceAllUsesWith(V);
- if (isInstructionTriviallyDead(Inst))
- BB->getInstList().erase(Inst);
- }
- }
-
- // TODO: after peeling or unrolling, previously loop variant conditions are
- // likely to fold to constants, eagerly propagating those here will require
- // fewer cleanup passes to be run. Alternatively, a LoopEarlyCSE might be
- // appropriate.
+ // At this point, the code is well formed. We now simplify the unrolled loop,
+ // doing constant propagation and dead code elimination as we go.
+ simplifyLoopAfterUnroll(L, !CompletelyUnroll && (Count > 1 || Peeled), LI, SE,
+ DT, AC);
NumCompletelyUnrolled += CompletelyUnroll;
++NumUnrolled;
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
new file mode 100644
index 000000000000..b919f73c3817
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -0,0 +1,785 @@
+//===-- LoopUnrollAndJam.cpp - Loop unrolling utilities -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements loop unroll and jam as a routine, much like
+// LoopUnroll.cpp implements loop unroll.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/Utils/Local.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopSimplify.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-unroll-and-jam"
+
+STATISTIC(NumUnrolledAndJammed, "Number of loops unroll and jammed");
+STATISTIC(NumCompletelyUnrolledAndJammed, "Number of loops unroll and jammed");
+
+typedef SmallPtrSet<BasicBlock *, 4> BasicBlockSet;
+
+// Partition blocks in an outer/inner loop pair into blocks before and after
+// the loop
+static bool partitionOuterLoopBlocks(Loop *L, Loop *SubLoop,
+ BasicBlockSet &ForeBlocks,
+ BasicBlockSet &SubLoopBlocks,
+ BasicBlockSet &AftBlocks,
+ DominatorTree *DT) {
+ BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
+ SubLoopBlocks.insert(SubLoop->block_begin(), SubLoop->block_end());
+
+ for (BasicBlock *BB : L->blocks()) {
+ if (!SubLoop->contains(BB)) {
+ if (DT->dominates(SubLoopLatch, BB))
+ AftBlocks.insert(BB);
+ else
+ ForeBlocks.insert(BB);
+ }
+ }
+
+ // Check that all blocks in ForeBlocks together dominate the subloop
+ // TODO: This might ideally be done better with a dominator/postdominators.
+ BasicBlock *SubLoopPreHeader = SubLoop->getLoopPreheader();
+ for (BasicBlock *BB : ForeBlocks) {
+ if (BB == SubLoopPreHeader)
+ continue;
+ TerminatorInst *TI = BB->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ if (!ForeBlocks.count(TI->getSuccessor(i)))
+ return false;
+ }
+
+ return true;
+}
+
+// Looks at the phi nodes in Header for values coming from Latch. For these
+// instructions and all their operands calls Visit on them, keeping going for
+// all the operands in AftBlocks. Returns false if Visit returns false,
+// otherwise returns true. This is used to process the instructions in the
+// Aft blocks that need to be moved before the subloop. It is used in two
+// places. One to check that the required set of instructions can be moved
+// before the loop. Then to collect the instructions to actually move in
+// moveHeaderPhiOperandsToForeBlocks.
+template <typename T>
+static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch,
+ BasicBlockSet &AftBlocks, T Visit) {
+ SmallVector<Instruction *, 8> Worklist;
+ for (auto &Phi : Header->phis()) {
+ Value *V = Phi.getIncomingValueForBlock(Latch);
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ Worklist.push_back(I);
+ }
+
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.back();
+ Worklist.pop_back();
+ if (!Visit(I))
+ return false;
+
+ if (AftBlocks.count(I->getParent()))
+ for (auto &U : I->operands())
+ if (Instruction *II = dyn_cast<Instruction>(U))
+ Worklist.push_back(II);
+ }
+
+ return true;
+}
+
+// Move the phi operands of Header from Latch out of AftBlocks to InsertLoc.
+static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header,
+ BasicBlock *Latch,
+ Instruction *InsertLoc,
+ BasicBlockSet &AftBlocks) {
+ // We need to ensure we move the instructions in the correct order,
+ // starting with the earliest required instruction and moving forward.
+ std::vector<Instruction *> Visited;
+ processHeaderPhiOperands(Header, Latch, AftBlocks,
+ [&Visited, &AftBlocks](Instruction *I) {
+ if (AftBlocks.count(I->getParent()))
+ Visited.push_back(I);
+ return true;
+ });
+
+ // Move all instructions in program order to before the InsertLoc
+ BasicBlock *InsertLocBB = InsertLoc->getParent();
+ for (Instruction *I : reverse(Visited)) {
+ if (I->getParent() != InsertLocBB)
+ I->moveBefore(InsertLoc);
+ }
+}
+
+/*
+ This method performs Unroll and Jam. For a simple loop like:
+ for (i = ..)
+ Fore(i)
+ for (j = ..)
+ SubLoop(i, j)
+ Aft(i)
+
+ Instead of doing normal inner or outer unrolling, we do:
+ for (i = .., i+=2)
+ Fore(i)
+ Fore(i+1)
+ for (j = ..)
+ SubLoop(i, j)
+ SubLoop(i+1, j)
+ Aft(i)
+ Aft(i+1)
+
+ So the outer loop is essetially unrolled and then the inner loops are fused
+ ("jammed") together into a single loop. This can increase speed when there
+ are loads in SubLoop that are invariant to i, as they become shared between
+ the now jammed inner loops.
+
+ We do this by spliting the blocks in the loop into Fore, Subloop and Aft.
+ Fore blocks are those before the inner loop, Aft are those after. Normal
+ Unroll code is used to copy each of these sets of blocks and the results are
+ combined together into the final form above.
+
+ isSafeToUnrollAndJam should be used prior to calling this to make sure the
+ unrolling will be valid. Checking profitablility is also advisable.
+*/
+LoopUnrollResult
+llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
+ unsigned TripMultiple, bool UnrollRemainder,
+ LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
+ AssumptionCache *AC, OptimizationRemarkEmitter *ORE) {
+
+ // When we enter here we should have already checked that it is safe
+ BasicBlock *Header = L->getHeader();
+ assert(L->getSubLoops().size() == 1);
+ Loop *SubLoop = *L->begin();
+
+ // Don't enter the unroll code if there is nothing to do.
+ if (TripCount == 0 && Count < 2) {
+ LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
+ return LoopUnrollResult::Unmodified;
+ }
+
+ assert(Count > 0);
+ assert(TripMultiple > 0);
+ assert(TripCount == 0 || TripCount % TripMultiple == 0);
+
+ // Are we eliminating the loop control altogether?
+ bool CompletelyUnroll = (Count == TripCount);
+
+ // We use the runtime remainder in cases where we don't know trip multiple
+ if (TripMultiple == 1 || TripMultiple % Count != 0) {
+ if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false,
+ /*UseEpilogRemainder*/ true,
+ UnrollRemainder, LI, SE, DT, AC, true)) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be "
+ "generated when assuming runtime trip count\n");
+ return LoopUnrollResult::Unmodified;
+ }
+ }
+
+ // Notify ScalarEvolution that the loop will be substantially changed,
+ // if not outright eliminated.
+ if (SE) {
+ SE->forgetLoop(L);
+ SE->forgetLoop(SubLoop);
+ }
+
+ using namespace ore;
+ // Report the unrolling decision.
+ if (CompletelyUnroll) {
+ LLVM_DEBUG(dbgs() << "COMPLETELY UNROLL AND JAMMING loop %"
+ << Header->getName() << " with trip count " << TripCount
+ << "!\n");
+ ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
+ L->getHeader())
+ << "completely unroll and jammed loop with "
+ << NV("UnrollCount", TripCount) << " iterations");
+ } else {
+ auto DiagBuilder = [&]() {
+ OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
+ L->getHeader());
+ return Diag << "unroll and jammed loop by a factor of "
+ << NV("UnrollCount", Count);
+ };
+
+ LLVM_DEBUG(dbgs() << "UNROLL AND JAMMING loop %" << Header->getName()
+ << " by " << Count);
+ if (TripMultiple != 1) {
+ LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
+ ORE->emit([&]() {
+ return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple)
+ << " trips per branch";
+ });
+ } else {
+ LLVM_DEBUG(dbgs() << " with run-time trip count");
+ ORE->emit([&]() { return DiagBuilder() << " with run-time trip count"; });
+ }
+ LLVM_DEBUG(dbgs() << "!\n");
+ }
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
+ assert(Preheader && LatchBlock && Header);
+ assert(BI && !BI->isUnconditional());
+ bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
+ BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
+ bool SubLoopContinueOnTrue = SubLoop->contains(
+ SubLoop->getLoopLatch()->getTerminator()->getSuccessor(0));
+
+ // Partition blocks in an outer/inner loop pair into blocks before and after
+ // the loop
+ BasicBlockSet SubLoopBlocks;
+ BasicBlockSet ForeBlocks;
+ BasicBlockSet AftBlocks;
+ partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks, AftBlocks,
+ DT);
+
+ // We keep track of the entering/first and exiting/last block of each of
+ // Fore/SubLoop/Aft in each iteration. This helps make the stapling up of
+ // blocks easier.
+ std::vector<BasicBlock *> ForeBlocksFirst;
+ std::vector<BasicBlock *> ForeBlocksLast;
+ std::vector<BasicBlock *> SubLoopBlocksFirst;
+ std::vector<BasicBlock *> SubLoopBlocksLast;
+ std::vector<BasicBlock *> AftBlocksFirst;
+ std::vector<BasicBlock *> AftBlocksLast;
+ ForeBlocksFirst.push_back(Header);
+ ForeBlocksLast.push_back(SubLoop->getLoopPreheader());
+ SubLoopBlocksFirst.push_back(SubLoop->getHeader());
+ SubLoopBlocksLast.push_back(SubLoop->getExitingBlock());
+ AftBlocksFirst.push_back(SubLoop->getExitBlock());
+ AftBlocksLast.push_back(L->getExitingBlock());
+ // Maps Blocks[0] -> Blocks[It]
+ ValueToValueMapTy LastValueMap;
+
+ // Move any instructions from fore phi operands from AftBlocks into Fore.
+ moveHeaderPhiOperandsToForeBlocks(
+ Header, LatchBlock, SubLoop->getLoopPreheader()->getTerminator(),
+ AftBlocks);
+
+ // The current on-the-fly SSA update requires blocks to be processed in
+ // reverse postorder so that LastValueMap contains the correct value at each
+ // exit.
+ LoopBlocksDFS DFS(L);
+ DFS.perform(LI);
+ // Stash the DFS iterators before adding blocks to the loop.
+ LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
+ LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
+
+ if (Header->getParent()->isDebugInfoForProfiling())
+ for (BasicBlock *BB : L->getBlocks())
+ for (Instruction &I : *BB)
+ if (!isa<DbgInfoIntrinsic>(&I))
+ if (const DILocation *DIL = I.getDebugLoc())
+ I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count));
+
+ // Copy all blocks
+ for (unsigned It = 1; It != Count; ++It) {
+ std::vector<BasicBlock *> NewBlocks;
+ // Maps Blocks[It] -> Blocks[It-1]
+ DenseMap<Value *, Value *> PrevItValueMap;
+
+ for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
+ ValueToValueMapTy VMap;
+ BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
+ Header->getParent()->getBasicBlockList().push_back(New);
+
+ if (ForeBlocks.count(*BB)) {
+ L->addBasicBlockToLoop(New, *LI);
+
+ if (*BB == ForeBlocksFirst[0])
+ ForeBlocksFirst.push_back(New);
+ if (*BB == ForeBlocksLast[0])
+ ForeBlocksLast.push_back(New);
+ } else if (SubLoopBlocks.count(*BB)) {
+ SubLoop->addBasicBlockToLoop(New, *LI);
+
+ if (*BB == SubLoopBlocksFirst[0])
+ SubLoopBlocksFirst.push_back(New);
+ if (*BB == SubLoopBlocksLast[0])
+ SubLoopBlocksLast.push_back(New);
+ } else if (AftBlocks.count(*BB)) {
+ L->addBasicBlockToLoop(New, *LI);
+
+ if (*BB == AftBlocksFirst[0])
+ AftBlocksFirst.push_back(New);
+ if (*BB == AftBlocksLast[0])
+ AftBlocksLast.push_back(New);
+ } else {
+ llvm_unreachable("BB being cloned should be in Fore/Sub/Aft");
+ }
+
+ // Update our running maps of newest clones
+ PrevItValueMap[New] = (It == 1 ? *BB : LastValueMap[*BB]);
+ LastValueMap[*BB] = New;
+ for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
+ VI != VE; ++VI) {
+ PrevItValueMap[VI->second] =
+ const_cast<Value *>(It == 1 ? VI->first : LastValueMap[VI->first]);
+ LastValueMap[VI->first] = VI->second;
+ }
+
+ NewBlocks.push_back(New);
+
+ // Update DomTree:
+ if (*BB == ForeBlocksFirst[0])
+ DT->addNewBlock(New, ForeBlocksLast[It - 1]);
+ else if (*BB == SubLoopBlocksFirst[0])
+ DT->addNewBlock(New, SubLoopBlocksLast[It - 1]);
+ else if (*BB == AftBlocksFirst[0])
+ DT->addNewBlock(New, AftBlocksLast[It - 1]);
+ else {
+ // Each set of blocks (Fore/Sub/Aft) will have the same internal domtree
+ // structure.
+ auto BBDomNode = DT->getNode(*BB);
+ auto BBIDom = BBDomNode->getIDom();
+ BasicBlock *OriginalBBIDom = BBIDom->getBlock();
+ assert(OriginalBBIDom);
+ assert(LastValueMap[cast<Value>(OriginalBBIDom)]);
+ DT->addNewBlock(
+ New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
+ }
+ }
+
+ // Remap all instructions in the most recent iteration
+ for (BasicBlock *NewBlock : NewBlocks) {
+ for (Instruction &I : *NewBlock) {
+ ::remapInstruction(&I, LastValueMap);
+ if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ if (II->getIntrinsicID() == Intrinsic::assume)
+ AC->registerAssumption(II);
+ }
+ }
+
+ // Alter the ForeBlocks phi's, pointing them at the latest version of the
+ // value from the previous iteration's phis
+ for (PHINode &Phi : ForeBlocksFirst[It]->phis()) {
+ Value *OldValue = Phi.getIncomingValueForBlock(AftBlocksLast[It]);
+ assert(OldValue && "should have incoming edge from Aft[It]");
+ Value *NewValue = OldValue;
+ if (Value *PrevValue = PrevItValueMap[OldValue])
+ NewValue = PrevValue;
+
+ assert(Phi.getNumOperands() == 2);
+ Phi.setIncomingBlock(0, ForeBlocksLast[It - 1]);
+ Phi.setIncomingValue(0, NewValue);
+ Phi.removeIncomingValue(1);
+ }
+ }
+
+ // Now that all the basic blocks for the unrolled iterations are in place,
+ // finish up connecting the blocks and phi nodes. At this point LastValueMap
+ // is the last unrolled iterations values.
+
+ // Update Phis in BB from OldBB to point to NewBB
+ auto updatePHIBlocks = [](BasicBlock *BB, BasicBlock *OldBB,
+ BasicBlock *NewBB) {
+ for (PHINode &Phi : BB->phis()) {
+ int I = Phi.getBasicBlockIndex(OldBB);
+ Phi.setIncomingBlock(I, NewBB);
+ }
+ };
+ // Update Phis in BB from OldBB to point to NewBB and use the latest value
+ // from LastValueMap
+ auto updatePHIBlocksAndValues = [](BasicBlock *BB, BasicBlock *OldBB,
+ BasicBlock *NewBB,
+ ValueToValueMapTy &LastValueMap) {
+ for (PHINode &Phi : BB->phis()) {
+ for (unsigned b = 0; b < Phi.getNumIncomingValues(); ++b) {
+ if (Phi.getIncomingBlock(b) == OldBB) {
+ Value *OldValue = Phi.getIncomingValue(b);
+ if (Value *LastValue = LastValueMap[OldValue])
+ Phi.setIncomingValue(b, LastValue);
+ Phi.setIncomingBlock(b, NewBB);
+ break;
+ }
+ }
+ }
+ };
+ // Move all the phis from Src into Dest
+ auto movePHIs = [](BasicBlock *Src, BasicBlock *Dest) {
+ Instruction *insertPoint = Dest->getFirstNonPHI();
+ while (PHINode *Phi = dyn_cast<PHINode>(Src->begin()))
+ Phi->moveBefore(insertPoint);
+ };
+
+ // Update the PHI values outside the loop to point to the last block
+ updatePHIBlocksAndValues(LoopExit, AftBlocksLast[0], AftBlocksLast.back(),
+ LastValueMap);
+
+ // Update ForeBlocks successors and phi nodes
+ BranchInst *ForeTerm =
+ cast<BranchInst>(ForeBlocksLast.back()->getTerminator());
+ BasicBlock *Dest = SubLoopBlocksFirst[0];
+ ForeTerm->setSuccessor(0, Dest);
+
+ if (CompletelyUnroll) {
+ while (PHINode *Phi = dyn_cast<PHINode>(ForeBlocksFirst[0]->begin())) {
+ Phi->replaceAllUsesWith(Phi->getIncomingValueForBlock(Preheader));
+ Phi->getParent()->getInstList().erase(Phi);
+ }
+ } else {
+ // Update the PHI values to point to the last aft block
+ updatePHIBlocksAndValues(ForeBlocksFirst[0], AftBlocksLast[0],
+ AftBlocksLast.back(), LastValueMap);
+ }
+
+ for (unsigned It = 1; It != Count; It++) {
+ // Remap ForeBlock successors from previous iteration to this
+ BranchInst *ForeTerm =
+ cast<BranchInst>(ForeBlocksLast[It - 1]->getTerminator());
+ BasicBlock *Dest = ForeBlocksFirst[It];
+ ForeTerm->setSuccessor(0, Dest);
+ }
+
+ // Subloop successors and phis
+ BranchInst *SubTerm =
+ cast<BranchInst>(SubLoopBlocksLast.back()->getTerminator());
+ SubTerm->setSuccessor(!SubLoopContinueOnTrue, SubLoopBlocksFirst[0]);
+ SubTerm->setSuccessor(SubLoopContinueOnTrue, AftBlocksFirst[0]);
+ updatePHIBlocks(SubLoopBlocksFirst[0], ForeBlocksLast[0],
+ ForeBlocksLast.back());
+ updatePHIBlocks(SubLoopBlocksFirst[0], SubLoopBlocksLast[0],
+ SubLoopBlocksLast.back());
+
+ for (unsigned It = 1; It != Count; It++) {
+ // Replace the conditional branch of the previous iteration subloop with an
+ // unconditional one to this one
+ BranchInst *SubTerm =
+ cast<BranchInst>(SubLoopBlocksLast[It - 1]->getTerminator());
+ BranchInst::Create(SubLoopBlocksFirst[It], SubTerm);
+ SubTerm->eraseFromParent();
+
+ updatePHIBlocks(SubLoopBlocksFirst[It], ForeBlocksLast[It],
+ ForeBlocksLast.back());
+ updatePHIBlocks(SubLoopBlocksFirst[It], SubLoopBlocksLast[It],
+ SubLoopBlocksLast.back());
+ movePHIs(SubLoopBlocksFirst[It], SubLoopBlocksFirst[0]);
+ }
+
+ // Aft blocks successors and phis
+ BranchInst *Term = cast<BranchInst>(AftBlocksLast.back()->getTerminator());
+ if (CompletelyUnroll) {
+ BranchInst::Create(LoopExit, Term);
+ Term->eraseFromParent();
+ } else {
+ Term->setSuccessor(!ContinueOnTrue, ForeBlocksFirst[0]);
+ }
+ updatePHIBlocks(AftBlocksFirst[0], SubLoopBlocksLast[0],
+ SubLoopBlocksLast.back());
+
+ for (unsigned It = 1; It != Count; It++) {
+ // Replace the conditional branch of the previous iteration subloop with an
+ // unconditional one to this one
+ BranchInst *AftTerm =
+ cast<BranchInst>(AftBlocksLast[It - 1]->getTerminator());
+ BranchInst::Create(AftBlocksFirst[It], AftTerm);
+ AftTerm->eraseFromParent();
+
+ updatePHIBlocks(AftBlocksFirst[It], SubLoopBlocksLast[It],
+ SubLoopBlocksLast.back());
+ movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]);
+ }
+
+ // Dominator Tree. Remove the old links between Fore, Sub and Aft, adding the
+ // new ones required.
+ if (Count != 1) {
+ SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
+ DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete, ForeBlocksLast[0],
+ SubLoopBlocksFirst[0]);
+ DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete,
+ SubLoopBlocksLast[0], AftBlocksFirst[0]);
+
+ DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
+ ForeBlocksLast.back(), SubLoopBlocksFirst[0]);
+ DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
+ SubLoopBlocksLast.back(), AftBlocksFirst[0]);
+ DT->applyUpdates(DTUpdates);
+ }
+
+ // Merge adjacent basic blocks, if possible.
+ SmallPtrSet<BasicBlock *, 16> MergeBlocks;
+ MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end());
+ MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end());
+ MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end());
+ while (!MergeBlocks.empty()) {
+ BasicBlock *BB = *MergeBlocks.begin();
+ BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
+ if (Term && Term->isUnconditional() && L->contains(Term->getSuccessor(0))) {
+ BasicBlock *Dest = Term->getSuccessor(0);
+ if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, DT)) {
+ // Don't remove BB and add Fold as they are the same BB
+ assert(Fold == BB);
+ (void)Fold;
+ MergeBlocks.erase(Dest);
+ } else
+ MergeBlocks.erase(BB);
+ } else
+ MergeBlocks.erase(BB);
+ }
+
+ // At this point, the code is well formed. We now do a quick sweep over the
+ // inserted code, doing constant propagation and dead code elimination as we
+ // go.
+ simplifyLoopAfterUnroll(SubLoop, true, LI, SE, DT, AC);
+ simplifyLoopAfterUnroll(L, !CompletelyUnroll && Count > 1, LI, SE, DT, AC);
+
+ NumCompletelyUnrolledAndJammed += CompletelyUnroll;
+ ++NumUnrolledAndJammed;
+
+#ifndef NDEBUG
+ // We shouldn't have done anything to break loop simplify form or LCSSA.
+ Loop *OuterL = L->getParentLoop();
+ Loop *OutestLoop = OuterL ? OuterL : (!CompletelyUnroll ? L : SubLoop);
+ assert(OutestLoop->isRecursivelyLCSSAForm(*DT, *LI));
+ if (!CompletelyUnroll)
+ assert(L->isLoopSimplifyForm());
+ assert(SubLoop->isLoopSimplifyForm());
+ assert(DT->verify());
+#endif
+
+ // Update LoopInfo if the loop is completely removed.
+ if (CompletelyUnroll)
+ LI->erase(L);
+
+ return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
+ : LoopUnrollResult::PartiallyUnrolled;
+}
+
+static bool getLoadsAndStores(BasicBlockSet &Blocks,
+ SmallVector<Value *, 4> &MemInstr) {
+ // Scan the BBs and collect legal loads and stores.
+ // Returns false if non-simple loads/stores are found.
+ for (BasicBlock *BB : Blocks) {
+ for (Instruction &I : *BB) {
+ if (auto *Ld = dyn_cast<LoadInst>(&I)) {
+ if (!Ld->isSimple())
+ return false;
+ MemInstr.push_back(&I);
+ } else if (auto *St = dyn_cast<StoreInst>(&I)) {
+ if (!St->isSimple())
+ return false;
+ MemInstr.push_back(&I);
+ } else if (I.mayReadOrWriteMemory()) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+static bool checkDependencies(SmallVector<Value *, 4> &Earlier,
+ SmallVector<Value *, 4> &Later,
+ unsigned LoopDepth, bool InnerLoop,
+ DependenceInfo &DI) {
+ // Use DA to check for dependencies between loads and stores that make unroll
+ // and jam invalid
+ for (Value *I : Earlier) {
+ for (Value *J : Later) {
+ Instruction *Src = cast<Instruction>(I);
+ Instruction *Dst = cast<Instruction>(J);
+ if (Src == Dst)
+ continue;
+ // Ignore Input dependencies.
+ if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
+ continue;
+
+ // Track dependencies, and if we find them take a conservative approach
+ // by allowing only = or < (not >), altough some > would be safe
+ // (depending upon unroll width).
+ // For the inner loop, we need to disallow any (> <) dependencies
+ // FIXME: Allow > so long as distance is less than unroll width
+ if (auto D = DI.depends(Src, Dst, true)) {
+ assert(D->isOrdered() && "Expected an output, flow or anti dep.");
+
+ if (D->isConfused())
+ return false;
+ if (!InnerLoop) {
+ if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT)
+ return false;
+ } else {
+ assert(LoopDepth + 1 <= D->getLevels());
+ if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT &&
+ D->getDirection(LoopDepth + 1) & Dependence::DVEntry::LT)
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+}
+
+static bool checkDependencies(Loop *L, BasicBlockSet &ForeBlocks,
+ BasicBlockSet &SubLoopBlocks,
+ BasicBlockSet &AftBlocks, DependenceInfo &DI) {
+ // Get all loads/store pairs for each blocks
+ SmallVector<Value *, 4> ForeMemInstr;
+ SmallVector<Value *, 4> SubLoopMemInstr;
+ SmallVector<Value *, 4> AftMemInstr;
+ if (!getLoadsAndStores(ForeBlocks, ForeMemInstr) ||
+ !getLoadsAndStores(SubLoopBlocks, SubLoopMemInstr) ||
+ !getLoadsAndStores(AftBlocks, AftMemInstr))
+ return false;
+
+ // Check for dependencies between any blocks that may change order
+ unsigned LoopDepth = L->getLoopDepth();
+ return checkDependencies(ForeMemInstr, SubLoopMemInstr, LoopDepth, false,
+ DI) &&
+ checkDependencies(ForeMemInstr, AftMemInstr, LoopDepth, false, DI) &&
+ checkDependencies(SubLoopMemInstr, AftMemInstr, LoopDepth, false,
+ DI) &&
+ checkDependencies(SubLoopMemInstr, SubLoopMemInstr, LoopDepth, true,
+ DI);
+}
+
+bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
+ DependenceInfo &DI) {
+ /* We currently handle outer loops like this:
+ |
+ ForeFirst <----\ }
+ Blocks | } ForeBlocks
+ ForeLast | }
+ | |
+ SubLoopFirst <\ | }
+ Blocks | | } SubLoopBlocks
+ SubLoopLast -/ | }
+ | |
+ AftFirst | }
+ Blocks | } AftBlocks
+ AftLast ------/ }
+ |
+
+ There are (theoretically) any number of blocks in ForeBlocks, SubLoopBlocks
+ and AftBlocks, providing that there is one edge from Fores to SubLoops,
+ one edge from SubLoops to Afts and a single outer loop exit (from Afts).
+ In practice we currently limit Aft blocks to a single block, and limit
+ things further in the profitablility checks of the unroll and jam pass.
+
+ Because of the way we rearrange basic blocks, we also require that
+ the Fore blocks on all unrolled iterations are safe to move before the
+ SubLoop blocks of all iterations. So we require that the phi node looping
+ operands of ForeHeader can be moved to at least the end of ForeEnd, so that
+ we can arrange cloned Fore Blocks before the subloop and match up Phi's
+ correctly.
+
+ i.e. The old order of blocks used to be F1 S1_1 S1_2 A1 F2 S2_1 S2_2 A2.
+ It needs to be safe to tranform this to F1 F2 S1_1 S2_1 S1_2 S2_2 A1 A2.
+
+ There are then a number of checks along the lines of no calls, no
+ exceptions, inner loop IV is consistent, etc. Note that for loops requiring
+ runtime unrolling, UnrollRuntimeLoopRemainder can also fail in
+ UnrollAndJamLoop if the trip count cannot be easily calculated.
+ */
+
+ if (!L->isLoopSimplifyForm() || L->getSubLoops().size() != 1)
+ return false;
+ Loop *SubLoop = L->getSubLoops()[0];
+ if (!SubLoop->isLoopSimplifyForm())
+ return false;
+
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = L->getLoopLatch();
+ BasicBlock *Exit = L->getExitingBlock();
+ BasicBlock *SubLoopHeader = SubLoop->getHeader();
+ BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
+ BasicBlock *SubLoopExit = SubLoop->getExitingBlock();
+
+ if (Latch != Exit)
+ return false;
+ if (SubLoopLatch != SubLoopExit)
+ return false;
+
+ if (Header->hasAddressTaken() || SubLoopHeader->hasAddressTaken())
+ return false;
+
+ // Split blocks into Fore/SubLoop/Aft based on dominators
+ BasicBlockSet SubLoopBlocks;
+ BasicBlockSet ForeBlocks;
+ BasicBlockSet AftBlocks;
+ if (!partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks,
+ AftBlocks, &DT))
+ return false;
+
+ // Aft blocks may need to move instructions to fore blocks, which becomes more
+ // difficult if there are multiple (potentially conditionally executed)
+ // blocks. For now we just exclude loops with multiple aft blocks.
+ if (AftBlocks.size() != 1)
+ return false;
+
+ // Check inner loop IV is consistent between all iterations
+ const SCEV *SubLoopBECountSC = SE.getExitCount(SubLoop, SubLoopLatch);
+ if (isa<SCEVCouldNotCompute>(SubLoopBECountSC) ||
+ !SubLoopBECountSC->getType()->isIntegerTy())
+ return false;
+ ScalarEvolution::LoopDisposition LD =
+ SE.getLoopDisposition(SubLoopBECountSC, L);
+ if (LD != ScalarEvolution::LoopInvariant)
+ return false;
+
+ // Check the loop safety info for exceptions.
+ LoopSafetyInfo LSI;
+ computeLoopSafetyInfo(&LSI, L);
+ if (LSI.MayThrow)
+ return false;
+
+ // We've ruled out the easy stuff and now need to check that there are no
+ // interdependencies which may prevent us from moving the:
+ // ForeBlocks before Subloop and AftBlocks.
+ // Subloop before AftBlocks.
+ // ForeBlock phi operands before the subloop
+
+ // Make sure we can move all instructions we need to before the subloop
+ if (!processHeaderPhiOperands(
+ Header, Latch, AftBlocks, [&AftBlocks, &SubLoop](Instruction *I) {
+ if (SubLoop->contains(I->getParent()))
+ return false;
+ if (AftBlocks.count(I->getParent())) {
+ // If we hit a phi node in afts we know we are done (probably
+ // LCSSA)
+ if (isa<PHINode>(I))
+ return false;
+ // Can't move instructions with side effects or memory
+ // reads/writes
+ if (I->mayHaveSideEffects() || I->mayReadOrWriteMemory())
+ return false;
+ }
+ // Keep going
+ return true;
+ }))
+ return false;
+
+ // Check for memory dependencies which prohibit the unrolling we are doing.
+ // Because of the way we are unrolling Fore/Sub/Aft blocks, we need to check
+ // there are no dependencies between Fore-Sub, Fore-Aft, Sub-Aft and Sub-Sub.
+ if (!checkDependencies(L, ForeBlocks, SubLoopBlocks, AftBlocks, DI))
+ return false;
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
index c84ae7d693d7..13794c53f24b 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
@@ -30,6 +31,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -46,6 +48,7 @@
#include <limits>
using namespace llvm;
+using namespace llvm::PatternMatch;
#define DEBUG_TYPE "loop-unroll"
@@ -66,7 +69,7 @@ static const unsigned InfiniteIterationsToInvariance =
std::numeric_limits<unsigned>::max();
// Check whether we are capable of peeling this loop.
-static bool canPeel(Loop *L) {
+bool llvm::canPeel(Loop *L) {
// Make sure the loop is in simplified form
if (!L->isLoopSimplifyForm())
return false;
@@ -136,11 +139,109 @@ static unsigned calculateIterationsToInvariance(
return ToInvariance;
}
+// Return the number of iterations to peel off that make conditions in the
+// body true/false. For example, if we peel 2 iterations off the loop below,
+// the condition i < 2 can be evaluated at compile time.
+// for (i = 0; i < n; i++)
+// if (i < 2)
+// ..
+// else
+// ..
+// }
+static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
+ ScalarEvolution &SE) {
+ assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
+ unsigned DesiredPeelCount = 0;
+
+ for (auto *BB : L.blocks()) {
+ auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || BI->isUnconditional())
+ continue;
+
+ // Ignore loop exit condition.
+ if (L.getLoopLatch() == BB)
+ continue;
+
+ Value *Condition = BI->getCondition();
+ Value *LeftVal, *RightVal;
+ CmpInst::Predicate Pred;
+ if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
+ continue;
+
+ const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
+ const SCEV *RightSCEV = SE.getSCEV(RightVal);
+
+ // Do not consider predicates that are known to be true or false
+ // independently of the loop iteration.
+ if (SE.isKnownPredicate(Pred, LeftSCEV, RightSCEV) ||
+ SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), LeftSCEV,
+ RightSCEV))
+ continue;
+
+ // Check if we have a condition with one AddRec and one non AddRec
+ // expression. Normalize LeftSCEV to be the AddRec.
+ if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
+ if (isa<SCEVAddRecExpr>(RightSCEV)) {
+ std::swap(LeftSCEV, RightSCEV);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ } else
+ continue;
+ }
+
+ const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
+
+ // Avoid huge SCEV computations in the loop below, make sure we only
+ // consider AddRecs of the loop we are trying to peel and avoid
+ // non-monotonic predicates, as we will not be able to simplify the loop
+ // body.
+ // FIXME: For the non-monotonic predicates ICMP_EQ and ICMP_NE we can
+ // simplify the loop, if we peel 1 additional iteration, if there
+ // is no wrapping.
+ bool Increasing;
+ if (!LeftAR->isAffine() || LeftAR->getLoop() != &L ||
+ !SE.isMonotonicPredicate(LeftAR, Pred, Increasing))
+ continue;
+ (void)Increasing;
+
+ // Check if extending the current DesiredPeelCount lets us evaluate Pred
+ // or !Pred in the loop body statically.
+ unsigned NewPeelCount = DesiredPeelCount;
+
+ const SCEV *IterVal = LeftAR->evaluateAtIteration(
+ SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);
+
+ // If the original condition is not known, get the negated predicate
+ // (which holds on the else branch) and check if it is known. This allows
+ // us to peel of iterations that make the original condition false.
+ if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
+ Pred = ICmpInst::getInversePredicate(Pred);
+
+ const SCEV *Step = LeftAR->getStepRecurrence(SE);
+ while (NewPeelCount < MaxPeelCount &&
+ SE.isKnownPredicate(Pred, IterVal, RightSCEV)) {
+ IterVal = SE.getAddExpr(IterVal, Step);
+ NewPeelCount++;
+ }
+
+ // Only peel the loop if the monotonic predicate !Pred becomes known in the
+ // first iteration of the loop body after peeling.
+ if (NewPeelCount > DesiredPeelCount &&
+ SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
+ RightSCEV))
+ DesiredPeelCount = NewPeelCount;
+ }
+
+ return DesiredPeelCount;
+}
+
// Return the number of iterations we want to peel off.
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP,
- unsigned &TripCount) {
+ unsigned &TripCount, ScalarEvolution &SE) {
assert(LoopSize > 0 && "Zero loop size is not allowed!");
+ // Save the UP.PeelCount value set by the target in
+ // TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
+ unsigned TargetPeelCount = UP.PeelCount;
UP.PeelCount = 0;
if (!canPeel(L))
return;
@@ -149,6 +250,19 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
if (!L->empty())
return;
+ // If the user provided a peel count, use that.
+ bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
+ if (UserPeelCount) {
+ LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
+ << " iterations.\n");
+ UP.PeelCount = UnrollForcePeelCount;
+ return;
+ }
+
+ // Skip peeling if it's disabled.
+ if (!UP.AllowPeeling)
+ return;
+
// Here we try to get rid of Phis which become invariants after 1, 2, ..., N
// iterations of the loop. For this we compute the number for iterations after
// which every Phi is guaranteed to become an invariant, and try to peel the
@@ -160,7 +274,9 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
SmallDenseMap<PHINode *, unsigned> IterationsToInvariance;
// Now go through all Phis to calculate their the number of iterations they
// need to become invariants.
- unsigned DesiredPeelCount = 0;
+ // Start the max computation with the UP.PeelCount value set by the target
+ // in TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
+ unsigned DesiredPeelCount = TargetPeelCount;
BasicBlock *BackEdge = L->getLoopLatch();
assert(BackEdge && "Loop is not in simplified form?");
for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) {
@@ -170,15 +286,21 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
if (ToInvariance != InfiniteIterationsToInvariance)
DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance);
}
+
+ // Pay respect to limitations implied by loop size and the max peel count.
+ unsigned MaxPeelCount = UnrollPeelMaxCount;
+ MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1);
+
+ DesiredPeelCount = std::max(DesiredPeelCount,
+ countToEliminateCompares(*L, MaxPeelCount, SE));
+
if (DesiredPeelCount > 0) {
- // Pay respect to limitations implied by loop size and the max peel count.
- unsigned MaxPeelCount = UnrollPeelMaxCount;
- MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1);
DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
// Consider max peel count limitation.
assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
- DEBUG(dbgs() << "Peel " << DesiredPeelCount << " iteration(s) to turn"
- << " some Phis into invariants.\n");
+ LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
+ << " iteration(s) to turn"
+ << " some Phis into invariants.\n");
UP.PeelCount = DesiredPeelCount;
return;
}
@@ -189,44 +311,37 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
if (TripCount)
return;
- // If the user provided a peel count, use that.
- bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
- if (UserPeelCount) {
- DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
- << " iterations.\n");
- UP.PeelCount = UnrollForcePeelCount;
- return;
- }
-
// If we don't know the trip count, but have reason to believe the average
// trip count is low, peeling should be beneficial, since we will usually
// hit the peeled section.
// We only do this in the presence of profile information, since otherwise
// our estimates of the trip count are not reliable enough.
- if (UP.AllowPeeling && L->getHeader()->getParent()->hasProfileData()) {
+ if (L->getHeader()->getParent()->hasProfileData()) {
Optional<unsigned> PeelCount = getLoopEstimatedTripCount(L);
if (!PeelCount)
return;
- DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount
- << "\n");
+ LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount
+ << "\n");
if (*PeelCount) {
if ((*PeelCount <= UnrollPeelMaxCount) &&
(LoopSize * (*PeelCount + 1) <= UP.Threshold)) {
- DEBUG(dbgs() << "Peeling first " << *PeelCount << " iterations.\n");
+ LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount
+ << " iterations.\n");
UP.PeelCount = *PeelCount;
return;
}
- DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
- DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
- DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) << "\n");
- DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n");
+ LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
+ LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
+ LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1)
+ << "\n");
+ LLVM_DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n");
}
}
}
-/// \brief Update the branch weights of the latch of a peeled-off loop
+/// Update the branch weights of the latch of a peeled-off loop
/// iteration.
/// This sets the branch weights for the latch of the recently peeled off loop
/// iteration correctly.
@@ -267,12 +382,12 @@ static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
}
}
-/// \brief Clones the body of the loop L, putting it between \p InsertTop and \p
+/// Clones the body of the loop L, putting it between \p InsertTop and \p
/// InsertBot.
/// \param IterNumber The serial number of the iteration currently being
/// peeled off.
/// \param Exit The exit block of the original loop.
-/// \param[out] NewBlocks A list of the the blocks in the newly created clone
+/// \param[out] NewBlocks A list of the blocks in the newly created clone
/// \param[out] VMap The value map between the loop and the new clone.
/// \param LoopBlocks A helper for DFS-traversal of the loop.
/// \param LVMap A value-map that maps instructions from the original loop to
@@ -376,7 +491,7 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
LVMap[KV.first] = KV.second;
}
-/// \brief Peel off the first \p PeelCount iterations of loop \p L.
+/// Peel off the first \p PeelCount iterations of loop \p L.
///
/// Note that this does not peel them off as a single straight-line block.
/// Rather, each iteration is peeled off separately, and needs to check the
@@ -388,8 +503,8 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
ScalarEvolution *SE, DominatorTree *DT,
AssumptionCache *AC, bool PreserveLCSSA) {
- if (!canPeel(L))
- return false;
+ assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
+ assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
LoopBlocksDFS LoopBlocks(L);
LoopBlocks.perform(LI);
@@ -500,10 +615,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
// the original loop body.
if (Iter == 0)
DT->changeImmediateDominator(Exit, cast<BasicBlock>(LVMap[Latch]));
-#ifndef NDEBUG
- if (VerifyDomInfo)
- DT->verifyDomTree();
-#endif
+ assert(DT->verify(DominatorTree::VerificationLevel::Fast));
}
updateBranchWeights(InsertBot, cast<BranchInst>(VMap[LatchBR]), Iter,
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index f79f423ce019..0057b4ba7ce1 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -21,8 +21,8 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -33,7 +33,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -418,8 +418,9 @@ canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits,
// UnrollRuntimeMultiExit is true. This will need updating the logic in
// connectEpilog/connectProlog.
if (!LatchExit->getSinglePredecessor()) {
- DEBUG(dbgs() << "Bailout for multi-exit handling when latch exit has >1 "
- "predecessor.\n");
+ LLVM_DEBUG(
+ dbgs() << "Bailout for multi-exit handling when latch exit has >1 "
+ "predecessor.\n");
return false;
}
// FIXME: We bail out of multi-exit unrolling when epilog loop is generated
@@ -528,14 +529,14 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
LoopInfo *LI, ScalarEvolution *SE,
DominatorTree *DT, AssumptionCache *AC,
bool PreserveLCSSA) {
- DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
- DEBUG(L->dump());
- DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" :
- dbgs() << "Using prolog remainder.\n");
+ LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
+ LLVM_DEBUG(L->dump());
+ LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
+ : dbgs() << "Using prolog remainder.\n");
// Make sure the loop is in canonical form.
if (!L->isLoopSimplifyForm()) {
- DEBUG(dbgs() << "Not in simplify form!\n");
+ LLVM_DEBUG(dbgs() << "Not in simplify form!\n");
return false;
}
@@ -561,7 +562,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// Support only single exit and exiting block unless multi-exit loop unrolling is enabled.
if (!isMultiExitUnrollingEnabled &&
(!L->getExitingBlock() || OtherExits.size())) {
- DEBUG(
+ LLVM_DEBUG(
dbgs()
<< "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
"enabled!\n");
@@ -581,7 +582,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
const SCEV *BECountSC = SE->getExitCount(L, Latch);
if (isa<SCEVCouldNotCompute>(BECountSC) ||
!BECountSC->getType()->isIntegerTy()) {
- DEBUG(dbgs() << "Could not compute exit block SCEV\n");
+ LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n");
return false;
}
@@ -591,7 +592,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
const SCEV *TripCountSC =
SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
if (isa<SCEVCouldNotCompute>(TripCountSC)) {
- DEBUG(dbgs() << "Could not compute trip count SCEV.\n");
+ LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n");
return false;
}
@@ -601,15 +602,16 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
SCEVExpander Expander(*SE, DL, "loop-unroll");
if (!AllowExpensiveTripCount &&
Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) {
- DEBUG(dbgs() << "High cost for expanding trip count scev!\n");
+ LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n");
return false;
}
// This constraint lets us deal with an overflowing trip count easily; see the
// comment on ModVal below.
if (Log2_32(Count) > BEWidth) {
- DEBUG(dbgs()
- << "Count failed constraint on overflow trip count calculation.\n");
+ LLVM_DEBUG(
+ dbgs()
+ << "Count failed constraint on overflow trip count calculation.\n");
return false;
}
@@ -763,7 +765,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// values from the cloned region. Also update the dominator info for
// OtherExits and their immediate successors, since we have new edges into
// OtherExits.
- SmallSet<BasicBlock*, 8> ImmediateSuccessorsOfExitBlocks;
+ SmallPtrSet<BasicBlock*, 8> ImmediateSuccessorsOfExitBlocks;
for (auto *BB : OtherExits) {
for (auto &II : *BB) {
@@ -878,10 +880,9 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
NewPreHeader, VMap, DT, LI, PreserveLCSSA);
}
- // If this loop is nested, then the loop unroller changes the code in the
- // parent loop, so the Scalar Evolution pass needs to be run again.
- if (Loop *ParentLoop = L->getParentLoop())
- SE->forgetLoop(ParentLoop);
+ // If this loop is nested, then the loop unroller changes the code in the any
+ // of its parent loops, so the Scalar Evolution pass needs to be run again.
+ SE->forgetTopmostLoop(L);
// Canonicalize to LoopSimplifyForm both original and remainder loops. We
// cannot rely on the LoopUnrollPass to do this because it only does
@@ -897,7 +898,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
}
if (remainderLoop && UnrollRemainder) {
- DEBUG(dbgs() << "Unrolling remainder loop\n");
+ LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1,
/*Force*/ false, /*AllowRuntime*/ false,
/*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 0a357f4b5004..46af120a428b 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -16,8 +16,10 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
@@ -553,47 +555,48 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
AC, DT)) {
- DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
+ LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
AC, DT)) {
- DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
+ LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB,
AC, DT)) {
- DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
+ LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
AC, DT)) {
- DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
+ LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB,
AC, DT)) {
- DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
+ LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes,
DB, AC, DT)) {
- DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n");
+ LLVM_DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
AC, DT)) {
- DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
+ LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
AC, DT)) {
- DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
+ LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB,
AC, DT)) {
- DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n");
+ LLVM_DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi
+ << "\n");
return true;
}
// Not a reduction of known type.
@@ -921,13 +924,13 @@ bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
}
/// This function is called when we suspect that the update-chain of a phi node
-/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
-/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
-/// predicate P under which the SCEV expression for the phi can be the
-/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
-/// cast instructions that are involved in the update-chain of this induction.
-/// A caller that adds the required runtime predicate can be free to drop these
-/// cast instructions, and compute the phi using \p AR (instead of some scev
+/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
+/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
+/// predicate P under which the SCEV expression for the phi can be the
+/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
+/// cast instructions that are involved in the update-chain of this induction.
+/// A caller that adds the required runtime predicate can be free to drop these
+/// cast instructions, and compute the phi using \p AR (instead of some scev
/// expression with casts).
///
/// For example, without a predicate the scev expression can take the following
@@ -962,7 +965,7 @@ static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE,
assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
const Loop *L = AR->getLoop();
- // Find any cast instructions that participate in the def-use chain of
+ // Find any cast instructions that participate in the def-use chain of
// PhiScev in the loop.
// FORNOW/TODO: We currently expect the def-use chain to include only
// two-operand instructions, where one of the operands is an invariant.
@@ -1050,7 +1053,7 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
AR = PSE.getAsAddRec(Phi);
if (!AR) {
- DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
+ LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
return false;
}
@@ -1084,14 +1087,15 @@ bool InductionDescriptor::isInductionPHI(
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
if (!AR) {
- DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
+ LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
return false;
}
if (AR->getLoop() != TheLoop) {
// FIXME: We should treat this as a uniform. Unfortunately, we
// don't currently know how to handled uniform PHIs.
- DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");
+ LLVM_DEBUG(
+ dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");
return false;
}
@@ -1172,11 +1176,12 @@ bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA);
if (!NewExitBB)
- DEBUG(dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
- << *L << "\n");
+ LLVM_DEBUG(
+ dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
+ << *L << "\n");
else
- DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
- << NewExitBB->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
+ << NewExitBB->getName() << "\n");
return true;
};
@@ -1199,7 +1204,7 @@ bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
return Changed;
}
-/// \brief Returns the instructions that use values defined in the loop.
+/// Returns the instructions that use values defined in the loop.
SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) {
SmallVector<Instruction *, 8> UsedOutside;
@@ -1276,7 +1281,7 @@ void llvm::initializeLoopPassPass(PassRegistry &Registry) {
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
}
-/// \brief Find string metadata for loop
+/// Find string metadata for loop
///
/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
/// operand or null otherwise. If the string metadata is not found return
@@ -1428,6 +1433,32 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
DT->deleteEdge(Preheader, L->getHeader());
}
+ // Given LCSSA form is satisfied, we should not have users of instructions
+ // within the dead loop outside of the loop. However, LCSSA doesn't take
+ // unreachable uses into account. We handle them here.
+ // We could do it after drop all references (in this case all users in the
+ // loop will be already eliminated and we have less work to do but according
+ // to API doc of User::dropAllReferences only valid operation after dropping
+ // references, is deletion. So let's substitute all usages of
+ // instruction from the loop with undef value of corresponding type first.
+ for (auto *Block : L->blocks())
+ for (Instruction &I : *Block) {
+ auto *Undef = UndefValue::get(I.getType());
+ for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E;) {
+ Use &U = *UI;
+ ++UI;
+ if (auto *Usr = dyn_cast<Instruction>(U.getUser()))
+ if (L->contains(Usr->getParent()))
+ continue;
+ // If we have a DT then we can check that uses outside a loop only in
+ // unreachable block.
+ if (DT)
+ assert(!DT->isReachableFromEntry(U) &&
+ "Unexpected user in reachable block");
+ U.set(Undef);
+ }
+ }
+
// Remove the block from the reference counting scheme, so that we can
// delete it freely later.
for (auto *Block : L->blocks())
@@ -1455,54 +1486,12 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
}
}
-/// Returns true if the instruction in a loop is guaranteed to execute at least
-/// once.
-bool llvm::isGuaranteedToExecute(const Instruction &Inst,
- const DominatorTree *DT, const Loop *CurLoop,
- const LoopSafetyInfo *SafetyInfo) {
- // We have to check to make sure that the instruction dominates all
- // of the exit blocks. If it doesn't, then there is a path out of the loop
- // which does not execute this instruction, so we can't hoist it.
-
- // If the instruction is in the header block for the loop (which is very
- // common), it is always guaranteed to dominate the exit blocks. Since this
- // is a common case, and can save some work, check it now.
- if (Inst.getParent() == CurLoop->getHeader())
- // If there's a throw in the header block, we can't guarantee we'll reach
- // Inst.
- return !SafetyInfo->HeaderMayThrow;
-
- // Somewhere in this loop there is an instruction which may throw and make us
- // exit the loop.
- if (SafetyInfo->MayThrow)
- return false;
-
- // Get the exit blocks for the current loop.
- SmallVector<BasicBlock *, 8> ExitBlocks;
- CurLoop->getExitBlocks(ExitBlocks);
-
- // Verify that the block dominates each of the exit blocks of the loop.
- for (BasicBlock *ExitBlock : ExitBlocks)
- if (!DT->dominates(Inst.getParent(), ExitBlock))
- return false;
-
- // As a degenerate case, if the loop is statically infinite then we haven't
- // proven anything since there are no exit blocks.
- if (ExitBlocks.empty())
- return false;
-
- // FIXME: In general, we have to prove that the loop isn't an infinite loop.
- // See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is
- // just a special case of this.)
- return true;
-}
-
Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
// Only support loops with a unique exiting block, and a latch.
if (!L->getExitingBlock())
return None;
- // Get the branch weights for the the loop's backedge.
+ // Get the branch weights for the loop's backedge.
BranchInst *LatchBR =
dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator());
if (!LatchBR || LatchBR->getNumSuccessors() != 2)
@@ -1530,7 +1519,7 @@ Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
return (FalseVal + (TrueVal / 2)) / TrueVal;
}
-/// \brief Adds a 'fast' flag to floating point operations.
+/// Adds a 'fast' flag to floating point operations.
static Value *addFastMathFlag(Value *V) {
if (isa<FPMathOperator>(V)) {
FastMathFlags Flags;
@@ -1540,6 +1529,38 @@ static Value *addFastMathFlag(Value *V) {
return V;
}
+// Helper to generate an ordered reduction.
+Value *
+llvm::getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src,
+ unsigned Op,
+ RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
+ ArrayRef<Value *> RedOps) {
+ unsigned VF = Src->getType()->getVectorNumElements();
+
+ // Extract and apply reduction ops in ascending order:
+ // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1]
+ Value *Result = Acc;
+ for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) {
+ Value *Ext =
+ Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx));
+
+ if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
+ Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext,
+ "bin.rdx");
+ } else {
+ assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
+ "Invalid min/max");
+ Result = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, Result,
+ Ext);
+ }
+
+ if (!RedOps.empty())
+ propagateIRFlags(Result, RedOps);
+ }
+
+ return Result;
+}
+
// Helper to generate a log2 shuffle reduction.
Value *
llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index 29756d9dab7f..abbcd5f9e3b8 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -140,9 +140,12 @@ void LoopVersioning::addPHINodes(
if (!PN) {
PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver",
&PHIBlock->front());
- for (auto *User : Inst->users())
- if (!VersionedLoop->contains(cast<Instruction>(User)->getParent()))
- User->replaceUsesOfWith(Inst, PN);
+ SmallVector<User*, 8> UsersToUpdate;
+ for (User *U : Inst->users())
+ if (!VersionedLoop->contains(cast<Instruction>(U)->getParent()))
+ UsersToUpdate.push_back(U);
+ for (User *U : UsersToUpdate)
+ U->replaceUsesOfWith(Inst, PN);
PN->addIncoming(Inst, VersionedLoop->getExitingBlock());
}
}
@@ -248,7 +251,7 @@ void LoopVersioning::annotateInstWithNoAlias(Instruction *VersionedInst,
}
namespace {
-/// \brief Also expose this is a pass. Currently this is only used for
+/// Also expose this is a pass. Currently this is only used for
/// unit-testing. It adds all memchecks necessary to remove all may-aliasing
/// array accesses from the loop.
class LoopVersioningPass : public FunctionPass {
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
index ee84541e526d..c852d538b0d1 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
@@ -21,7 +21,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
using namespace llvm;
#define DEBUG_TYPE "lowerinvoke"
@@ -48,10 +48,12 @@ static bool runImpl(Function &F) {
bool Changed = false;
for (BasicBlock &BB : F)
if (InvokeInst *II = dyn_cast<InvokeInst>(BB.getTerminator())) {
- SmallVector<Value *, 16> CallArgs(II->op_begin(), II->op_end() - 3);
+ SmallVector<Value *, 16> CallArgs(II->arg_begin(), II->arg_end());
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ II->getOperandBundlesAsDefs(OpBundles);
// Insert a normal call instruction...
CallInst *NewCall =
- CallInst::Create(II->getCalledValue(), CallArgs, "", II);
+ CallInst::Create(II->getCalledValue(), CallArgs, OpBundles, "", II);
NewCall->takeName(II);
NewCall->setCallingConv(II->getCallingConv());
NewCall->setAttributes(II->getAttributes());
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 57dc225e9dab..03006ef3a2d3 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -409,8 +409,8 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
/* SrcAddr */ Memcpy->getRawSource(),
/* DstAddr */ Memcpy->getRawDest(),
/* CopyLen */ CI,
- /* SrcAlign */ Memcpy->getAlignment(),
- /* DestAlign */ Memcpy->getAlignment(),
+ /* SrcAlign */ Memcpy->getSourceAlignment(),
+ /* DestAlign */ Memcpy->getDestAlignment(),
/* SrcIsVolatile */ Memcpy->isVolatile(),
/* DstIsVolatile */ Memcpy->isVolatile(),
/* TargetTransformInfo */ TTI);
@@ -419,8 +419,8 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
/* SrcAddr */ Memcpy->getRawSource(),
/* DstAddr */ Memcpy->getRawDest(),
/* CopyLen */ Memcpy->getLength(),
- /* SrcAlign */ Memcpy->getAlignment(),
- /* DestAlign */ Memcpy->getAlignment(),
+ /* SrcAlign */ Memcpy->getSourceAlignment(),
+ /* DestAlign */ Memcpy->getDestAlignment(),
/* SrcIsVolatile */ Memcpy->isVolatile(),
/* DstIsVolatile */ Memcpy->isVolatile(),
/* TargetTransfomrInfo */ TTI);
@@ -432,8 +432,8 @@ void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) {
/* SrcAddr */ Memmove->getRawSource(),
/* DstAddr */ Memmove->getRawDest(),
/* CopyLen */ Memmove->getLength(),
- /* SrcAlign */ Memmove->getAlignment(),
- /* DestAlign */ Memmove->getAlignment(),
+ /* SrcAlign */ Memmove->getSourceAlignment(),
+ /* DestAlign */ Memmove->getDestAlignment(),
/* SrcIsVolatile */ Memmove->isVolatile(),
/* DstIsVolatile */ Memmove->isVolatile());
}
@@ -443,6 +443,6 @@ void llvm::expandMemSetAsLoop(MemSetInst *Memset) {
/* DstAddr */ Memset->getRawDest(),
/* CopyLen */ Memset->getLength(),
/* SetValue */ Memset->getValue(),
- /* Alignment */ Memset->getAlignment(),
+ /* Alignment */ Memset->getDestAlignment(),
Memset->isVolatile());
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index 344cb35df986..e99ecfef19cd 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -29,7 +29,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <algorithm>
#include <cassert>
@@ -74,7 +74,7 @@ namespace {
LowerSwitch() : FunctionPass(ID) {
initializeLowerSwitchPass(*PassRegistry::getPassRegistry());
- }
+ }
bool runOnFunction(Function &F) override;
@@ -155,11 +155,8 @@ bool LowerSwitch::runOnFunction(Function &F) {
}
/// Used for debugging purposes.
-static raw_ostream& operator<<(raw_ostream &O,
- const LowerSwitch::CaseVector &C)
- LLVM_ATTRIBUTE_USED;
-
-static raw_ostream& operator<<(raw_ostream &O,
+LLVM_ATTRIBUTE_USED
+static raw_ostream &operator<<(raw_ostream &O,
const LowerSwitch::CaseVector &C) {
O << "[";
@@ -172,7 +169,7 @@ static raw_ostream& operator<<(raw_ostream &O,
return O << "]";
}
-/// \brief Update the first occurrence of the "switch statement" BB in the PHI
+/// Update the first occurrence of the "switch statement" BB in the PHI
/// node with the "new" BB. The other occurrences will:
///
/// 1) Be updated by subsequent calls to this function. Switch statements may
@@ -245,14 +242,13 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
unsigned Mid = Size / 2;
std::vector<CaseRange> LHS(Begin, Begin + Mid);
- DEBUG(dbgs() << "LHS: " << LHS << "\n");
+ LLVM_DEBUG(dbgs() << "LHS: " << LHS << "\n");
std::vector<CaseRange> RHS(Begin + Mid, End);
- DEBUG(dbgs() << "RHS: " << RHS << "\n");
+ LLVM_DEBUG(dbgs() << "RHS: " << RHS << "\n");
CaseRange &Pivot = *(Begin + Mid);
- DEBUG(dbgs() << "Pivot ==> "
- << Pivot.Low->getValue()
- << " -" << Pivot.High->getValue() << "\n");
+ LLVM_DEBUG(dbgs() << "Pivot ==> " << Pivot.Low->getValue() << " -"
+ << Pivot.High->getValue() << "\n");
// NewLowerBound here should never be the integer minimal value.
// This is because it is computed from a case range that is never
@@ -274,20 +270,14 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
NewUpperBound = LHS.back().High;
}
- DEBUG(dbgs() << "LHS Bounds ==> ";
- if (LowerBound) {
- dbgs() << LowerBound->getSExtValue();
- } else {
- dbgs() << "NONE";
- }
- dbgs() << " - " << NewUpperBound->getSExtValue() << "\n";
- dbgs() << "RHS Bounds ==> ";
- dbgs() << NewLowerBound->getSExtValue() << " - ";
- if (UpperBound) {
- dbgs() << UpperBound->getSExtValue() << "\n";
- } else {
- dbgs() << "NONE\n";
- });
+ LLVM_DEBUG(dbgs() << "LHS Bounds ==> "; if (LowerBound) {
+ dbgs() << LowerBound->getSExtValue();
+ } else { dbgs() << "NONE"; } dbgs() << " - "
+ << NewUpperBound->getSExtValue() << "\n";
+ dbgs() << "RHS Bounds ==> ";
+ dbgs() << NewLowerBound->getSExtValue() << " - "; if (UpperBound) {
+ dbgs() << UpperBound->getSExtValue() << "\n";
+ } else { dbgs() << "NONE\n"; });
// Create a new node that checks if the value is < pivot. Go to the
// left branch if it is and right branch if not.
@@ -337,7 +327,7 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
} else if (Leaf.Low->isZero()) {
// Val >= 0 && Val <= Hi --> Val <=u Hi
Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
- "SwitchLeaf");
+ "SwitchLeaf");
} else {
// Emit V-Lo <=u Hi-Lo
Constant* NegLo = ConstantExpr::getNeg(Leaf.Low);
@@ -364,7 +354,7 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
for (uint64_t j = 0; j < Range; ++j) {
PN->removeIncomingValue(OrigBlock);
}
-
+
int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
assert(BlockIdx != -1 && "Switch didn't go to this successor??");
PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf);
@@ -382,7 +372,7 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
Cases.push_back(CaseRange(Case.getCaseValue(), Case.getCaseValue(),
Case.getCaseSuccessor()));
- std::sort(Cases.begin(), Cases.end(), CaseCmp());
+ llvm::sort(Cases.begin(), Cases.end(), CaseCmp());
// Merge case into clusters
if (Cases.size() >= 2) {
@@ -443,9 +433,9 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
// Prepare cases vector.
CaseVector Cases;
unsigned numCmps = Clusterify(Cases, SI);
- DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
- << ". Total compares: " << numCmps << "\n");
- DEBUG(dbgs() << "Cases: " << Cases << "\n");
+ LLVM_DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+ << ". Total compares: " << numCmps << "\n");
+ LLVM_DEBUG(dbgs() << "Cases: " << Cases << "\n");
(void)numCmps;
ConstantInt *LowerBound = nullptr;
@@ -505,6 +495,10 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
}
#endif
+ // As the default block in the switch is unreachable, update the PHI nodes
+ // (remove the entry to the default block) to reflect this.
+ Default->removePredecessor(OrigBlock);
+
// Use the most popular block as the new default, reducing the number of
// cases.
assert(MaxPop > 0 && PopSucc);
@@ -518,29 +512,33 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
if (Cases.empty()) {
BranchInst::Create(Default, CurBlock);
SI->eraseFromParent();
+ // As all the cases have been replaced with a single branch, only keep
+ // one entry in the PHI nodes.
+ for (unsigned I = 0 ; I < (MaxPop - 1) ; ++I)
+ PopSucc->removePredecessor(OrigBlock);
return;
}
}
+ unsigned NrOfDefaults = (SI->getDefaultDest() == Default) ? 1 : 0;
+ for (const auto &Case : SI->cases())
+ if (Case.getCaseSuccessor() == Default)
+ NrOfDefaults++;
+
// Create a new, empty default block so that the new hierarchy of
// if-then statements go to this and the PHI nodes are happy.
BasicBlock *NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
F->getBasicBlockList().insert(Default->getIterator(), NewDefault);
BranchInst::Create(Default, NewDefault);
- // If there is an entry in any PHI nodes for the default edge, make sure
- // to update them as well.
- for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) {
- PHINode *PN = cast<PHINode>(I);
- int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
- assert(BlockIdx != -1 && "Switch didn't go to this successor??");
- PN->setIncomingBlock((unsigned)BlockIdx, NewDefault);
- }
-
BasicBlock *SwitchBlock =
switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val,
OrigBlock, OrigBlock, NewDefault, UnreachableRanges);
+ // If there are entries in any PHI nodes for the default edge, make sure
+ // to update them as well.
+ fixPhis(Default, OrigBlock, NewDefault, NrOfDefaults);
+
// Branch to our shiny new if-then stuff...
BranchInst::Create(SwitchBlock, OrigBlock);
diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
index 29f289b62da0..23145e584751 100644
--- a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
@@ -22,7 +22,7 @@
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <vector>
diff --git a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
index 0f7bd76c03ca..323f2552ca80 100644
--- a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
@@ -29,7 +29,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/TypeFinder.h"
#include "llvm/Pass.h"
-#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp
index dc780542ce68..6d0b96f6aa8a 100644
--- a/contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp
@@ -14,19 +14,38 @@
#include "llvm/Transforms/Utils/OrderedInstructions.h"
using namespace llvm;
+bool OrderedInstructions::localDominates(const Instruction *InstA,
+ const Instruction *InstB) const {
+ assert(InstA->getParent() == InstB->getParent() &&
+ "Instructions must be in the same basic block");
+
+ const BasicBlock *IBB = InstA->getParent();
+ auto OBB = OBBMap.find(IBB);
+ if (OBB == OBBMap.end())
+ OBB = OBBMap.insert({IBB, make_unique<OrderedBasicBlock>(IBB)}).first;
+ return OBB->second->dominates(InstA, InstB);
+}
+
/// Given 2 instructions, use OrderedBasicBlock to check for dominance relation
/// if the instructions are in the same basic block, Otherwise, use dominator
/// tree.
bool OrderedInstructions::dominates(const Instruction *InstA,
const Instruction *InstB) const {
- const BasicBlock *IBB = InstA->getParent();
// Use ordered basic block to do dominance check in case the 2 instructions
// are in the same basic block.
- if (IBB == InstB->getParent()) {
- auto OBB = OBBMap.find(IBB);
- if (OBB == OBBMap.end())
- OBB = OBBMap.insert({IBB, make_unique<OrderedBasicBlock>(IBB)}).first;
- return OBB->second->dominates(InstA, InstB);
- }
+ if (InstA->getParent() == InstB->getParent())
+ return localDominates(InstA, InstB);
return DT->dominates(InstA->getParent(), InstB->getParent());
}
+
+bool OrderedInstructions::dfsBefore(const Instruction *InstA,
+ const Instruction *InstB) const {
+ // Use ordered basic block in case the 2 instructions are in the same basic
+ // block.
+ if (InstA->getParent() == InstB->getParent())
+ return localDominates(InstA, InstB);
+
+ DomTreeNode *DA = DT->getNode(InstA->getParent());
+ DomTreeNode *DB = DT->getNode(InstB->getParent());
+ return DA->getDFSNumIn() < DB->getDFSNumIn();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index d47be6ea566b..2923977b791a 100644
--- a/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
@@ -24,6 +25,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
@@ -32,7 +34,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/OrderedInstructions.h"
#include <algorithm>
#define DEBUG_TYPE "predicateinfo"
@@ -118,7 +120,7 @@ static bool valueComesBefore(OrderedInstructions &OI, const Value *A,
return false;
if (ArgA && ArgB)
return ArgA->getArgNo() < ArgB->getArgNo();
- return OI.dominates(cast<Instruction>(A), cast<Instruction>(B));
+ return OI.dfsBefore(cast<Instruction>(A), cast<Instruction>(B));
}
// This compares ValueDFS structures, creating OrderedBasicBlocks where
@@ -479,6 +481,19 @@ void PredicateInfo::buildPredicateInfo() {
renameUses(OpsToRename);
}
+// Create a ssa_copy declaration with custom mangling, because
+// Intrinsic::getDeclaration does not handle overloaded unnamed types properly:
+// all unnamed types get mangled to the same string. We use the pointer
+// to the type as name here, as it guarantees unique names for different
+// types and we remove the declarations when destroying PredicateInfo.
+// It is a workaround for PR38117, because solving it in a fully general way is
+// tricky (FIXME).
+static Function *getCopyDeclaration(Module *M, Type *Ty) {
+ std::string Name = "llvm.ssa.copy." + utostr((uintptr_t) Ty);
+ return cast<Function>(M->getOrInsertFunction(
+ Name, getType(M->getContext(), Intrinsic::ssa_copy, Ty)));
+}
+
// Given the renaming stack, make all the operands currently on the stack real
// by inserting them into the IR. Return the last operation's value.
Value *PredicateInfo::materializeStack(unsigned int &Counter,
@@ -507,8 +522,9 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter,
// order in the case of multiple predicateinfo in the same block.
if (isa<PredicateWithEdge>(ValInfo)) {
IRBuilder<> B(getBranchTerminator(ValInfo));
- Function *IF = Intrinsic::getDeclaration(
- F.getParent(), Intrinsic::ssa_copy, Op->getType());
+ Function *IF = getCopyDeclaration(F.getParent(), Op->getType());
+ if (IF->user_begin() == IF->user_end())
+ CreatedDeclarations.insert(IF);
CallInst *PIC =
B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++));
PredicateMap.insert({PIC, ValInfo});
@@ -518,8 +534,9 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter,
assert(PAssume &&
"Should not have gotten here without it being an assume");
IRBuilder<> B(PAssume->AssumeInst);
- Function *IF = Intrinsic::getDeclaration(
- F.getParent(), Intrinsic::ssa_copy, Op->getType());
+ Function *IF = getCopyDeclaration(F.getParent(), Op->getType());
+ if (IF->user_begin() == IF->user_end())
+ CreatedDeclarations.insert(IF);
CallInst *PIC = B.CreateCall(IF, Op);
PredicateMap.insert({PIC, ValInfo});
Result.Def = PIC;
@@ -553,10 +570,11 @@ void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {
auto Comparator = [&](const Value *A, const Value *B) {
return valueComesBefore(OI, A, B);
};
- std::sort(OpsToRename.begin(), OpsToRename.end(), Comparator);
+ llvm::sort(OpsToRename.begin(), OpsToRename.end(), Comparator);
ValueDFS_Compare Compare(OI);
// Compute liveness, and rename in O(uses) per Op.
for (auto *Op : OpsToRename) {
+ LLVM_DEBUG(dbgs() << "Visiting " << *Op << "\n");
unsigned Counter = 0;
SmallVector<ValueDFS, 16> OrderedUses;
const auto &ValueInfo = getValueInfo(Op);
@@ -625,15 +643,15 @@ void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {
// we want to.
bool PossibleCopy = VD.PInfo != nullptr;
if (RenameStack.empty()) {
- DEBUG(dbgs() << "Rename Stack is empty\n");
+ LLVM_DEBUG(dbgs() << "Rename Stack is empty\n");
} else {
- DEBUG(dbgs() << "Rename Stack Top DFS numbers are ("
- << RenameStack.back().DFSIn << ","
- << RenameStack.back().DFSOut << ")\n");
+ LLVM_DEBUG(dbgs() << "Rename Stack Top DFS numbers are ("
+ << RenameStack.back().DFSIn << ","
+ << RenameStack.back().DFSOut << ")\n");
}
- DEBUG(dbgs() << "Current DFS numbers are (" << VD.DFSIn << ","
- << VD.DFSOut << ")\n");
+ LLVM_DEBUG(dbgs() << "Current DFS numbers are (" << VD.DFSIn << ","
+ << VD.DFSOut << ")\n");
bool ShouldPush = (VD.Def || PossibleCopy);
bool OutOfScope = !stackIsInScope(RenameStack, VD);
@@ -652,7 +670,7 @@ void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {
if (VD.Def || PossibleCopy)
continue;
if (!DebugCounter::shouldExecute(RenameCounter)) {
- DEBUG(dbgs() << "Skipping execution due to debug counter\n");
+ LLVM_DEBUG(dbgs() << "Skipping execution due to debug counter\n");
continue;
}
ValueDFS &Result = RenameStack.back();
@@ -663,8 +681,9 @@ void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {
if (!Result.Def)
Result.Def = materializeStack(Counter, RenameStack, Op);
- DEBUG(dbgs() << "Found replacement " << *Result.Def << " for "
- << *VD.U->get() << " in " << *(VD.U->getUser()) << "\n");
+ LLVM_DEBUG(dbgs() << "Found replacement " << *Result.Def << " for "
+ << *VD.U->get() << " in " << *(VD.U->getUser())
+ << "\n");
assert(DT.dominates(cast<Instruction>(Result.Def), *VD.U) &&
"Predicateinfo def should have dominated this use");
VD.U->set(Result.Def);
@@ -702,7 +721,22 @@ PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT,
buildPredicateInfo();
}
-PredicateInfo::~PredicateInfo() {}
+// Remove all declarations we created . The PredicateInfo consumers are
+// responsible for remove the ssa_copy calls created.
+PredicateInfo::~PredicateInfo() {
+ // Collect function pointers in set first, as SmallSet uses a SmallVector
+ // internally and we have to remove the asserting value handles first.
+ SmallPtrSet<Function *, 20> FunctionPtrs;
+ for (auto &F : CreatedDeclarations)
+ FunctionPtrs.insert(&*F);
+ CreatedDeclarations.clear();
+
+ for (Function *F : FunctionPtrs) {
+ assert(F->user_begin() == F->user_end() &&
+ "PredicateInfo consumer did not remove all SSA copies.");
+ F->eraseFromParent();
+ }
+}
void PredicateInfo::verifyPredicateInfo() const {}
@@ -720,6 +754,20 @@ void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AssumptionCacheTracker>();
}
+// Replace ssa_copy calls created by PredicateInfo with their operand.
+static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) {
+ for (auto I = inst_begin(F), E = inst_end(F); I != E;) {
+ Instruction *Inst = &*I++;
+ const auto *PI = PredInfo.getPredicateInfoFor(Inst);
+ auto *II = dyn_cast<IntrinsicInst>(Inst);
+ if (!PI || !II || II->getIntrinsicID() != Intrinsic::ssa_copy)
+ continue;
+
+ Inst->replaceAllUsesWith(II->getOperand(0));
+ Inst->eraseFromParent();
+ }
+}
+
bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) {
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
@@ -727,6 +775,8 @@ bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) {
PredInfo->print(dbgs());
if (VerifyPredicateInfo)
PredInfo->verifyPredicateInfo();
+
+ replaceCreatedSSACopys(*PredInfo, F);
return false;
}
@@ -735,12 +785,14 @@ PreservedAnalyses PredicateInfoPrinterPass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
OS << "PredicateInfo for function: " << F.getName() << "\n";
- make_unique<PredicateInfo>(F, DT, AC)->print(OS);
+ auto PredInfo = make_unique<PredicateInfo>(F, DT, AC);
+ PredInfo->print(OS);
+ replaceCreatedSSACopys(*PredInfo, F);
return PreservedAnalyses::all();
}
-/// \brief An assembly annotator class to print PredicateInfo information in
+/// An assembly annotator class to print PredicateInfo information in
/// comments.
class PredicateInfoAnnotatedWriter : public AssemblyAnnotationWriter {
friend class PredicateInfo;
diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index fcd3bd08482a..86e15bbd7f22 100644
--- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -26,6 +26,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/IteratedDominanceFrontier.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -45,7 +46,6 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
#include <cassert>
@@ -164,26 +164,27 @@ struct AllocaInfo {
}
};
-// Data package used by RenamePass()
-class RenamePassData {
-public:
+/// Data package used by RenamePass().
+struct RenamePassData {
using ValVector = std::vector<Value *>;
+ using LocationVector = std::vector<DebugLoc>;
- RenamePassData(BasicBlock *B, BasicBlock *P, ValVector V)
- : BB(B), Pred(P), Values(std::move(V)) {}
+ RenamePassData(BasicBlock *B, BasicBlock *P, ValVector V, LocationVector L)
+ : BB(B), Pred(P), Values(std::move(V)), Locations(std::move(L)) {}
BasicBlock *BB;
BasicBlock *Pred;
ValVector Values;
+ LocationVector Locations;
};
-/// \brief This assigns and keeps a per-bb relative ordering of load/store
+/// This assigns and keeps a per-bb relative ordering of load/store
/// instructions in the block that directly load or store an alloca.
///
/// This functionality is important because it avoids scanning large basic
/// blocks multiple times when promoting many allocas in the same block.
class LargeBlockInfo {
- /// \brief For each instruction that we track, keep the index of the
+ /// For each instruction that we track, keep the index of the
/// instruction.
///
/// The index starts out as the number of the instruction from the start of
@@ -242,7 +243,7 @@ struct PromoteMem2Reg {
/// Reverse mapping of Allocas.
DenseMap<AllocaInst *, unsigned> AllocaLookup;
- /// \brief The PhiNodes we're adding.
+ /// The PhiNodes we're adding.
///
/// That map is used to simplify some Phi nodes as we iterate over it, so
/// it should have deterministic iterators. We could use a MapVector, but
@@ -294,7 +295,7 @@ private:
unsigned getNumPreds(const BasicBlock *BB) {
unsigned &NP = BBNumPreds[BB];
if (NP == 0)
- NP = std::distance(pred_begin(BB), pred_end(BB)) + 1;
+ NP = pred_size(BB) + 1;
return NP - 1;
}
@@ -303,6 +304,7 @@ private:
SmallPtrSetImpl<BasicBlock *> &LiveInBlocks);
void RenamePass(BasicBlock *BB, BasicBlock *Pred,
RenamePassData::ValVector &IncVals,
+ RenamePassData::LocationVector &IncLocs,
std::vector<RenamePassData> &Worklist);
bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
};
@@ -345,7 +347,7 @@ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
}
}
-/// \brief Rewrite as many loads as possible given a single store.
+/// Rewrite as many loads as possible given a single store.
///
/// When there is only a single store, we can use the domtree to trivially
/// replace all of the dominated loads with the stored value. Do so, and return
@@ -475,7 +477,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
// Sort the stores by their index, making it efficient to do a lookup with a
// binary search.
- std::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first());
+ llvm::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first());
// Walk all of the loads from this alloca, replacing them with the nearest
// store above them, if any.
@@ -509,6 +511,11 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
!isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))
addAssumeNonNull(AC, LI);
+ // If the replacement value is the load, this must occur in unreachable
+ // code.
+ if (ReplVal == LI)
+ ReplVal = UndefValue::get(LI->getType());
+
LI->replaceAllUsesWith(ReplVal);
}
@@ -631,10 +638,10 @@ void PromoteMem2Reg::run() {
SmallVector<BasicBlock *, 32> PHIBlocks;
IDF.calculate(PHIBlocks);
if (PHIBlocks.size() > 1)
- std::sort(PHIBlocks.begin(), PHIBlocks.end(),
- [this](BasicBlock *A, BasicBlock *B) {
- return BBNumbers.lookup(A) < BBNumbers.lookup(B);
- });
+ llvm::sort(PHIBlocks.begin(), PHIBlocks.end(),
+ [this](BasicBlock *A, BasicBlock *B) {
+ return BBNumbers.lookup(A) < BBNumbers.lookup(B);
+ });
unsigned CurrentVersion = 0;
for (BasicBlock *BB : PHIBlocks)
@@ -653,15 +660,20 @@ void PromoteMem2Reg::run() {
for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
+ // When handling debug info, treat all incoming values as if they have unknown
+ // locations until proven otherwise.
+ RenamePassData::LocationVector Locations(Allocas.size());
+
// Walks all basic blocks in the function performing the SSA rename algorithm
// and inserting the phi nodes we marked as necessary
std::vector<RenamePassData> RenamePassWorkList;
- RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values));
+ RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values),
+ std::move(Locations));
do {
RenamePassData RPD = std::move(RenamePassWorkList.back());
RenamePassWorkList.pop_back();
// RenamePass may add new worklist entries.
- RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
+ RenamePass(RPD.BB, RPD.Pred, RPD.Values, RPD.Locations, RenamePassWorkList);
} while (!RenamePassWorkList.empty());
// The renamer uses the Visited set to avoid infinite loops. Clear it now.
@@ -740,7 +752,7 @@ void PromoteMem2Reg::run() {
// Ok, now we know that all of the PHI nodes are missing entries for some
// basic blocks. Start by sorting the incoming predecessors for efficient
// access.
- std::sort(Preds.begin(), Preds.end());
+ llvm::sort(Preds.begin(), Preds.end());
// Now we loop through all BB's which have entries in SomePHI and remove
// them from the Preds list.
@@ -772,7 +784,7 @@ void PromoteMem2Reg::run() {
NewPhiNodes.clear();
}
-/// \brief Determine which blocks the value is live in.
+/// Determine which blocks the value is live in.
///
/// These are blocks which lead to uses. Knowing this allows us to avoid
/// inserting PHI nodes into blocks which don't lead to uses (thus, the
@@ -846,7 +858,7 @@ void PromoteMem2Reg::ComputeLiveInBlocks(
}
}
-/// \brief Queue a phi-node to be added to a basic-block for a specific Alloca.
+/// Queue a phi-node to be added to a basic-block for a specific Alloca.
///
/// Returns true if there wasn't already a phi-node for that variable
bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
@@ -868,13 +880,24 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
return true;
}
-/// \brief Recursively traverse the CFG of the function, renaming loads and
+/// Update the debug location of a phi. \p ApplyMergedLoc indicates whether to
+/// create a merged location incorporating \p DL, or to set \p DL directly.
+static void updateForIncomingValueLocation(PHINode *PN, DebugLoc DL,
+ bool ApplyMergedLoc) {
+ if (ApplyMergedLoc)
+ PN->applyMergedLocation(PN->getDebugLoc(), DL);
+ else
+ PN->setDebugLoc(DL);
+}
+
+/// Recursively traverse the CFG of the function, renaming loads and
/// stores to the allocas which we are promoting.
///
/// IncomingVals indicates what value each Alloca contains on exit from the
/// predecessor block Pred.
void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
RenamePassData::ValVector &IncomingVals,
+ RenamePassData::LocationVector &IncomingLocs,
std::vector<RenamePassData> &Worklist) {
NextIteration:
// If we are inserting any phi nodes into this BB, they will already be in the
@@ -899,6 +922,10 @@ NextIteration:
do {
unsigned AllocaNo = PhiToAllocaMap[APN];
+ // Update the location of the phi node.
+ updateForIncomingValueLocation(APN, IncomingLocs[AllocaNo],
+ APN->getNumIncomingValues() > 0);
+
// Add N incoming values to the PHI node.
for (unsigned i = 0; i != NumEdges; ++i)
APN->addIncoming(IncomingVals[AllocaNo], Pred);
@@ -960,8 +987,11 @@ NextIteration:
continue;
// what value were we writing?
- IncomingVals[ai->second] = SI->getOperand(0);
+ unsigned AllocaNo = ai->second;
+ IncomingVals[AllocaNo] = SI->getOperand(0);
+
// Record debuginfo for the store before removing it.
+ IncomingLocs[AllocaNo] = SI->getDebugLoc();
for (DbgInfoIntrinsic *DII : AllocaDbgDeclares[ai->second])
ConvertDebugDeclareToDebugValue(DII, SI, DIB);
BB->getInstList().erase(SI);
@@ -984,7 +1014,7 @@ NextIteration:
for (; I != E; ++I)
if (VisitedSuccs.insert(*I).second)
- Worklist.emplace_back(*I, Pred, IncomingVals);
+ Worklist.emplace_back(*I, Pred, IncomingVals, IncomingLocs);
goto NextIteration;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index b2231d68a301..ca184ed7c4e3 100644
--- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -178,7 +178,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
// If the client wants to know about all new instructions, tell it.
if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
- DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n");
+ LLVM_DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n");
return InsertedPHI;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp
new file mode 100644
index 000000000000..397bac2940a4
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp
@@ -0,0 +1,191 @@
+//===- SSAUpdaterBulk.cpp - Unstructured SSA Update Tool ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SSAUpdaterBulk class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
+#include "llvm/Analysis/IteratedDominanceFrontier.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ssaupdaterbulk"
+
+/// Helper function for finding a block which should have a value for the given
+/// user. For PHI-nodes this block is the corresponding predecessor, for other
+/// instructions it's their parent block.
+static BasicBlock *getUserBB(Use *U) {
+ auto *User = cast<Instruction>(U->getUser());
+
+ if (auto *UserPN = dyn_cast<PHINode>(User))
+ return UserPN->getIncomingBlock(*U);
+ else
+ return User->getParent();
+}
+
+/// Add a new variable to the SSA rewriter. This needs to be called before
+/// AddAvailableValue or AddUse calls.
+unsigned SSAUpdaterBulk::AddVariable(StringRef Name, Type *Ty) {
+ unsigned Var = Rewrites.size();
+ LLVM_DEBUG(dbgs() << "SSAUpdater: Var=" << Var << ": initialized with Ty = "
+ << *Ty << ", Name = " << Name << "\n");
+ RewriteInfo RI(Name, Ty);
+ Rewrites.push_back(RI);
+ return Var;
+}
+
+/// Indicate that a rewritten value is available in the specified block with the
+/// specified value.
+void SSAUpdaterBulk::AddAvailableValue(unsigned Var, BasicBlock *BB, Value *V) {
+ assert(Var < Rewrites.size() && "Variable not found!");
+ LLVM_DEBUG(dbgs() << "SSAUpdater: Var=" << Var
+ << ": added new available value" << *V << " in "
+ << BB->getName() << "\n");
+ Rewrites[Var].Defines[BB] = V;
+}
+
+/// Record a use of the symbolic value. This use will be updated with a
+/// rewritten value when RewriteAllUses is called.
+void SSAUpdaterBulk::AddUse(unsigned Var, Use *U) {
+ assert(Var < Rewrites.size() && "Variable not found!");
+ LLVM_DEBUG(dbgs() << "SSAUpdater: Var=" << Var << ": added a use" << *U->get()
+ << " in " << getUserBB(U)->getName() << "\n");
+ Rewrites[Var].Uses.push_back(U);
+}
+
+/// Return true if the SSAUpdater already has a value for the specified variable
+/// in the specified block.
+bool SSAUpdaterBulk::HasValueForBlock(unsigned Var, BasicBlock *BB) {
+ return (Var < Rewrites.size()) ? Rewrites[Var].Defines.count(BB) : false;
+}
+
+// Compute value at the given block BB. We either should already know it, or we
+// should be able to recursively reach it going up dominator tree.
+Value *SSAUpdaterBulk::computeValueAt(BasicBlock *BB, RewriteInfo &R,
+ DominatorTree *DT) {
+ if (!R.Defines.count(BB)) {
+ if (DT->isReachableFromEntry(BB) && PredCache.get(BB).size()) {
+ BasicBlock *IDom = DT->getNode(BB)->getIDom()->getBlock();
+ Value *V = computeValueAt(IDom, R, DT);
+ R.Defines[BB] = V;
+ } else
+ R.Defines[BB] = UndefValue::get(R.Ty);
+ }
+ return R.Defines[BB];
+}
+
+/// Given sets of UsingBlocks and DefBlocks, compute the set of LiveInBlocks.
+/// This is basically a subgraph limited by DefBlocks and UsingBlocks.
+static void
+ComputeLiveInBlocks(const SmallPtrSetImpl<BasicBlock *> &UsingBlocks,
+ const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
+ SmallPtrSetImpl<BasicBlock *> &LiveInBlocks,
+ PredIteratorCache &PredCache) {
+ // To determine liveness, we must iterate through the predecessors of blocks
+ // where the def is live. Blocks are added to the worklist if we need to
+ // check their predecessors. Start with all the using blocks.
+ SmallVector<BasicBlock *, 64> LiveInBlockWorklist(UsingBlocks.begin(),
+ UsingBlocks.end());
+
+ // Now that we have a set of blocks where the phi is live-in, recursively add
+ // their predecessors until we find the full region the value is live.
+ while (!LiveInBlockWorklist.empty()) {
+ BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
+
+ // The block really is live in here, insert it into the set. If already in
+ // the set, then it has already been processed.
+ if (!LiveInBlocks.insert(BB).second)
+ continue;
+
+ // Since the value is live into BB, it is either defined in a predecessor or
+ // live into it to. Add the preds to the worklist unless they are a
+ // defining block.
+ for (BasicBlock *P : PredCache.get(BB)) {
+ // The value is not live into a predecessor if it defines the value.
+ if (DefBlocks.count(P))
+ continue;
+
+ // Otherwise it is, add to the worklist.
+ LiveInBlockWorklist.push_back(P);
+ }
+ }
+}
+
+/// Perform all the necessary updates, including new PHI-nodes insertion and the
+/// requested uses update.
+void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT,
+ SmallVectorImpl<PHINode *> *InsertedPHIs) {
+ for (auto &R : Rewrites) {
+ // Compute locations for new phi-nodes.
+ // For that we need to initialize DefBlocks from definitions in R.Defines,
+ // UsingBlocks from uses in R.Uses, then compute LiveInBlocks, and then use
+ // this set for computing iterated dominance frontier (IDF).
+ // The IDF blocks are the blocks where we need to insert new phi-nodes.
+ ForwardIDFCalculator IDF(*DT);
+ LLVM_DEBUG(dbgs() << "SSAUpdater: rewriting " << R.Uses.size()
+ << " use(s)\n");
+
+ SmallPtrSet<BasicBlock *, 2> DefBlocks;
+ for (auto &Def : R.Defines)
+ DefBlocks.insert(Def.first);
+ IDF.setDefiningBlocks(DefBlocks);
+
+ SmallPtrSet<BasicBlock *, 2> UsingBlocks;
+ for (Use *U : R.Uses)
+ UsingBlocks.insert(getUserBB(U));
+
+ SmallVector<BasicBlock *, 32> IDFBlocks;
+ SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
+ ComputeLiveInBlocks(UsingBlocks, DefBlocks, LiveInBlocks, PredCache);
+ IDF.resetLiveInBlocks();
+ IDF.setLiveInBlocks(LiveInBlocks);
+ IDF.calculate(IDFBlocks);
+
+ // We've computed IDF, now insert new phi-nodes there.
+ SmallVector<PHINode *, 4> InsertedPHIsForVar;
+ for (auto *FrontierBB : IDFBlocks) {
+ IRBuilder<> B(FrontierBB, FrontierBB->begin());
+ PHINode *PN = B.CreatePHI(R.Ty, 0, R.Name);
+ R.Defines[FrontierBB] = PN;
+ InsertedPHIsForVar.push_back(PN);
+ if (InsertedPHIs)
+ InsertedPHIs->push_back(PN);
+ }
+
+ // Fill in arguments of the inserted PHIs.
+ for (auto *PN : InsertedPHIsForVar) {
+ BasicBlock *PBB = PN->getParent();
+ for (BasicBlock *Pred : PredCache.get(PBB))
+ PN->addIncoming(computeValueAt(Pred, R, DT), Pred);
+ }
+
+ // Rewrite actual uses with the inserted definitions.
+ SmallPtrSet<Use *, 4> ProcessedUses;
+ for (Use *U : R.Uses) {
+ if (!ProcessedUses.insert(U).second)
+ continue;
+ Value *V = computeValueAt(getUserBB(U), R, DT);
+ Value *OldVal = U->get();
+ assert(OldVal && "Invalid use!");
+ // Notify that users of the existing value that it is being replaced.
+ if (OldVal != V && OldVal->hasValueHandle())
+ ValueHandleBase::ValueIsRAUWd(OldVal, V);
+ LLVM_DEBUG(dbgs() << "SSAUpdater: replacing " << *OldVal << " with " << *V
+ << "\n");
+ U->set(V);
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 7c195788e416..c87b5c16ffce 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -19,7 +19,6 @@
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
@@ -28,6 +27,7 @@
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -66,7 +66,6 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
@@ -688,9 +687,7 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
// Do not permit merging of large switch instructions into their
// predecessors unless there is only one predecessor.
- if (SI->getNumSuccessors() * std::distance(pred_begin(SI->getParent()),
- pred_end(SI->getParent())) <=
- 128)
+ if (SI->getNumSuccessors() * pred_size(SI->getParent()) <= 128)
CV = SI->getCondition();
} else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
if (BI->isConditional() && BI->getCondition()->hasOneUse())
@@ -847,9 +844,9 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
// Remove PHI node entries for the dead edge.
ThisCases[0].Dest->removePredecessor(TI->getParent());
- DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
- << "Through successor TI: " << *TI << "Leaving: " << *NI
- << "\n");
+ LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI << "Leaving: " << *NI
+ << "\n");
EraseTerminatorInstAndDCECond(TI);
return true;
@@ -861,8 +858,8 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
DeadCases.insert(PredCases[i].Value);
- DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
- << "Through successor TI: " << *TI);
+ LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI);
// Collect branch weights into a vector.
SmallVector<uint32_t, 8> Weights;
@@ -888,7 +885,7 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
if (HasWeight && Weights.size() >= 2)
setBranchWeights(SI, Weights);
- DEBUG(dbgs() << "Leaving: " << *TI << "\n");
+ LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
return true;
}
@@ -929,9 +926,9 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
Instruction *NI = Builder.CreateBr(TheRealDest);
(void)NI;
- DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
- << "Through successor TI: " << *TI << "Leaving: " << *NI
- << "\n");
+ LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI << "Leaving: " << *NI
+ << "\n");
EraseTerminatorInstAndDCECond(TI);
return true;
@@ -1290,31 +1287,44 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
return Changed;
- // For a normal instruction, we just move one to right before the branch,
- // then replace all uses of the other with the first. Finally, we remove
- // the now redundant second instruction.
- BIParent->getInstList().splice(BI->getIterator(), BB1->getInstList(), I1);
- if (!I2->use_empty())
- I2->replaceAllUsesWith(I1);
- I1->andIRFlags(I2);
- unsigned KnownIDs[] = {LLVMContext::MD_tbaa,
- LLVMContext::MD_range,
- LLVMContext::MD_fpmath,
- LLVMContext::MD_invariant_load,
- LLVMContext::MD_nonnull,
- LLVMContext::MD_invariant_group,
- LLVMContext::MD_align,
- LLVMContext::MD_dereferenceable,
- LLVMContext::MD_dereferenceable_or_null,
- LLVMContext::MD_mem_parallel_loop_access};
- combineMetadata(I1, I2, KnownIDs);
-
- // I1 and I2 are being combined into a single instruction. Its debug
- // location is the merged locations of the original instructions.
- I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
-
- I2->eraseFromParent();
- Changed = true;
+ if (isa<DbgInfoIntrinsic>(I1) || isa<DbgInfoIntrinsic>(I2)) {
+ assert (isa<DbgInfoIntrinsic>(I1) && isa<DbgInfoIntrinsic>(I2));
+ // The debug location is an integral part of a debug info intrinsic
+ // and can't be separated from it or replaced. Instead of attempting
+ // to merge locations, simply hoist both copies of the intrinsic.
+ BIParent->getInstList().splice(BI->getIterator(),
+ BB1->getInstList(), I1);
+ BIParent->getInstList().splice(BI->getIterator(),
+ BB2->getInstList(), I2);
+ Changed = true;
+ } else {
+ // For a normal instruction, we just move one to right before the branch,
+ // then replace all uses of the other with the first. Finally, we remove
+ // the now redundant second instruction.
+ BIParent->getInstList().splice(BI->getIterator(),
+ BB1->getInstList(), I1);
+ if (!I2->use_empty())
+ I2->replaceAllUsesWith(I1);
+ I1->andIRFlags(I2);
+ unsigned KnownIDs[] = {LLVMContext::MD_tbaa,
+ LLVMContext::MD_range,
+ LLVMContext::MD_fpmath,
+ LLVMContext::MD_invariant_load,
+ LLVMContext::MD_nonnull,
+ LLVMContext::MD_invariant_group,
+ LLVMContext::MD_align,
+ LLVMContext::MD_dereferenceable,
+ LLVMContext::MD_dereferenceable_or_null,
+ LLVMContext::MD_mem_parallel_loop_access};
+ combineMetadata(I1, I2, KnownIDs);
+
+ // I1 and I2 are being combined into a single instruction. Its debug
+ // location is the merged locations of the original instructions.
+ I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
+
+ I2->eraseFromParent();
+ Changed = true;
+ }
I1 = &*BB1_Itr++;
I2 = &*BB2_Itr++;
@@ -1728,7 +1738,8 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
LockstepReverseIterator LRI(UnconditionalPreds);
while (LRI.isValid() &&
canSinkInstructions(*LRI, PHIOperands)) {
- DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0] << "\n");
+ LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
+ << "\n");
InstructionsToSink.insert((*LRI).begin(), (*LRI).end());
++ScanIdx;
--LRI;
@@ -1740,7 +1751,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
for (auto *V : PHIOperands[I])
if (InstructionsToSink.count(V) == 0)
++NumPHIdValues;
- DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
+ LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
NumPHIInsts++;
@@ -1768,7 +1779,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
if (!Profitable)
return false;
- DEBUG(dbgs() << "SINK: Splitting edge\n");
+ LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
// We have a conditional edge and we're going to sink some instructions.
// Insert a new block postdominating all blocks we're going to sink from.
if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split"))
@@ -1790,16 +1801,17 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
// and never actually sink it which means we produce more PHIs than intended.
// This is unlikely in practice though.
for (unsigned SinkIdx = 0; SinkIdx != ScanIdx; ++SinkIdx) {
- DEBUG(dbgs() << "SINK: Sink: "
- << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
- << "\n");
+ LLVM_DEBUG(dbgs() << "SINK: Sink: "
+ << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
+ << "\n");
// Because we've sunk every instruction in turn, the current instruction to
// sink is always at index 0.
LRI.reset();
if (!ProfitableToSinkInstruction(LRI)) {
// Too many PHIs would be created.
- DEBUG(dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
+ LLVM_DEBUG(
+ dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
break;
}
@@ -1811,7 +1823,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
return Changed;
}
-/// \brief Determine if we can hoist sink a sole store instruction out of a
+/// Determine if we can hoist sink a sole store instruction out of a
/// conditional block.
///
/// We are looking for code like the following:
@@ -1851,12 +1863,9 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
// Look for a store to the same pointer in BrBB.
unsigned MaxNumInstToLookAt = 9;
- for (Instruction &CurI : reverse(*BrBB)) {
+ for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug())) {
if (!MaxNumInstToLookAt)
break;
- // Skip debug info.
- if (isa<DbgInfoIntrinsic>(CurI))
- continue;
--MaxNumInstToLookAt;
// Could be calling an instruction that affects memory like free().
@@ -1875,7 +1884,7 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
return nullptr;
}
-/// \brief Speculate a conditional basic block flattening the CFG.
+/// Speculate a conditional basic block flattening the CFG.
///
/// Note that this is a very risky transform currently. Speculating
/// instructions like this is most often not desirable. Instead, there is an MI
@@ -2045,7 +2054,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
return false;
// If we get here, we can hoist the instruction and if-convert.
- DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
+ LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
// Insert a select of the value of the speculated store.
if (SpeculatedStoreValue) {
@@ -2106,19 +2115,16 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
/// Return true if we can thread a branch across this block.
static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
- BranchInst *BI = cast<BranchInst>(BB->getTerminator());
unsigned Size = 0;
- for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
- if (isa<DbgInfoIntrinsic>(BBI))
- continue;
+ for (Instruction &I : BB->instructionsWithoutDebug()) {
if (Size > 10)
return false; // Don't clone large BB's.
++Size;
// We can only support instructions that do not define values that are
// live outside of the current basic block.
- for (User *U : BBI->users()) {
+ for (User *U : I.users()) {
Instruction *UI = cast<Instruction>(U);
if (UI->getParent() != BB || isa<PHINode>(UI))
return false;
@@ -2260,6 +2266,10 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// dependence information for this check, but simplifycfg can't keep it up
// to date, and this catches most of the cases we care about anyway.
BasicBlock *BB = PN->getParent();
+ const Function *Fn = BB->getParent();
+ if (Fn && Fn->hasFnAttribute(Attribute::OptForFuzzing))
+ return false;
+
BasicBlock *IfTrue, *IfFalse;
Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse);
if (!IfCond ||
@@ -2350,8 +2360,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
}
}
- DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: "
- << IfTrue->getName() << " F: " << IfFalse->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond
+ << " T: " << IfTrue->getName()
+ << " F: " << IfFalse->getName() << "\n");
// If we can still promote the PHI nodes after this gauntlet of tests,
// do all of the PHI's now.
@@ -2475,9 +2486,9 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
(void)RI;
- DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
- << "\n " << *BI << "NewRet = " << *RI
- << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: " << *FalseSucc);
+ LLVM_DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
+ << "\n " << *BI << "NewRet = " << *RI << "TRUEBLOCK: "
+ << *TrueSucc << "FALSEBLOCK: " << *FalseSucc);
EraseTerminatorInstAndDCECond(BI);
@@ -2486,7 +2497,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
/// Return true if the given instruction is available
/// in its predecessor block. If yes, the instruction will be removed.
-static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) {
+static bool tryCSEWithPredecessor(Instruction *Inst, BasicBlock *PB) {
if (!isa<BinaryOperator>(Inst) && !isa<CmpInst>(Inst))
return false;
for (Instruction &I : *PB) {
@@ -2543,14 +2554,16 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
if (PBI->isConditional() &&
(BI->getSuccessor(0) == PBI->getSuccessor(0) ||
BI->getSuccessor(0) == PBI->getSuccessor(1))) {
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
+ for (auto I = BB->instructionsWithoutDebug().begin(),
+ E = BB->instructionsWithoutDebug().end();
+ I != E;) {
Instruction *Curr = &*I++;
if (isa<CmpInst>(Curr)) {
Cond = Curr;
break;
}
// Quit if we can't remove this instruction.
- if (!checkCSEInPredecessor(Curr, PB))
+ if (!tryCSEWithPredecessor(Curr, PB))
return false;
}
}
@@ -2650,7 +2663,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
continue;
}
- DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
+ LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
IRBuilder<> Builder(PBI);
// If we need to invert the condition in the pred block to match, do so now.
@@ -2860,7 +2873,7 @@ static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
if (!AlternativeV)
break;
- assert(std::distance(pred_begin(Succ), pred_end(Succ)) == 2);
+ assert(pred_size(Succ) == 2);
auto PredI = pred_begin(Succ);
BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
@@ -2903,14 +2916,13 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
// instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
// thread this store.
unsigned N = 0;
- for (auto &I : *BB) {
+ for (auto &I : BB->instructionsWithoutDebug()) {
// Cheap instructions viable for folding.
if (isa<BinaryOperator>(I) || isa<GetElementPtrInst>(I) ||
isa<StoreInst>(I))
++N;
// Free instructions.
- else if (isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
- IsaBitcastOfPointerType(I))
+ else if (isa<TerminatorInst>(I) || IsaBitcastOfPointerType(I))
continue;
else
return false;
@@ -2965,6 +2977,21 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
if (&*I != PStore && I->mayReadOrWriteMemory())
return false;
+ // If PostBB has more than two predecessors, we need to split it so we can
+ // sink the store.
+ if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
+ // We know that QFB's only successor is PostBB. And QFB has a single
+ // predecessor. If QTB exists, then its only successor is also PostBB.
+ // If QTB does not exist, then QFB's only predecessor has a conditional
+ // branch to QFB and PostBB.
+ BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
+ BasicBlock *NewBB = SplitBlockPredecessors(PostBB, { QFB, TruePred},
+ "condstore.split");
+ if (!NewBB)
+ return false;
+ PostBB = NewBB;
+ }
+
// OK, we're going to sink the stores to PostBB. The store has to be
// conditional though, so first create the predicate.
Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
@@ -3100,7 +3127,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
(QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
return false;
- if (!PostBB->hasNUses(2) || !QBI->getParent()->hasNUses(2))
+ if (!QBI->getParent()->hasNUses(2))
return false;
// OK, this is a sequence of two diamonds or triangles.
@@ -3200,11 +3227,9 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// If this is a conditional branch in an empty block, and if any
// predecessors are a conditional branch to one of our destinations,
// fold the conditions into logical ops and one cond br.
- BasicBlock::iterator BBI = BB->begin();
+
// Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(BBI))
- ++BBI;
- if (&*BBI != BI)
+ if (&*BB->instructionsWithoutDebug().begin() != BI)
return false;
int PBIOp, BIOp;
@@ -3261,8 +3286,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// Finally, if everything is ok, fold the branches to logical ops.
BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
- DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
- << "AND: " << *BI->getParent());
+ LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
+ << "AND: " << *BI->getParent());
// If OtherDest *is* BB, then BB is a basic block with a single conditional
// branch in it, where one edge (OtherDest) goes back to itself but the other
@@ -3280,7 +3305,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
OtherDest = InfLoopBlock;
}
- DEBUG(dbgs() << *PBI->getParent()->getParent());
+ LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
// BI may have other predecessors. Because of this, we leave
// it alone, but modify PBI.
@@ -3364,8 +3389,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
}
}
- DEBUG(dbgs() << "INTO: " << *PBI->getParent());
- DEBUG(dbgs() << *PBI->getParent()->getParent());
+ LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
+ LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
// This basic block is probably dead. We know it has at least
// one fewer predecessor.
@@ -3665,9 +3690,9 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
BasicBlock *BB = BI->getParent();
- DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
- << " cases into SWITCH. BB is:\n"
- << *BB);
+ LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
+ << " cases into SWITCH. BB is:\n"
+ << *BB);
// If there are any extra values that couldn't be folded into the switch
// then we evaluate them with an explicit branch first. Split the block
@@ -3690,8 +3715,8 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
// for the edge we just added.
AddPredecessorToBlock(EdgeBB, BB, NewBB);
- DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
- << "\nEXTRABB = " << *BB);
+ LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
+ << "\nEXTRABB = " << *BB);
BB = NewBB;
}
@@ -3722,7 +3747,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
// Erase the old branch instruction.
EraseTerminatorInstAndDCECond(BI);
- DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
+ LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
return true;
}
@@ -3873,6 +3898,7 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI) {
switch (IntrinsicID) {
case Intrinsic::dbg_declare:
case Intrinsic::dbg_value:
+ case Intrinsic::dbg_label:
case Intrinsic::lifetime_end:
break;
default:
@@ -4049,8 +4075,8 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
if (!UncondBranchPreds.empty() && DupRet) {
while (!UncondBranchPreds.empty()) {
BasicBlock *Pred = UncondBranchPreds.pop_back_val();
- DEBUG(dbgs() << "FOLDING: " << *BB
- << "INTO UNCOND BRANCH PRED: " << *Pred);
+ LLVM_DEBUG(dbgs() << "FOLDING: " << *BB
+ << "INTO UNCOND BRANCH PRED: " << *Pred);
(void)FoldReturnIntoUncondBranch(RI, BB, Pred);
}
@@ -4374,7 +4400,8 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
(CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) {
DeadCases.push_back(Case.getCaseValue());
- DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal << " is dead.\n");
+ LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
+ << " is dead.\n");
}
}
@@ -4390,7 +4417,7 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
if (HasDefault && DeadCases.empty() &&
NumUnknownBits < 64 /* avoid overflow */ &&
SI->getNumCases() == (1ULL << NumUnknownBits)) {
- DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
+ LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
BasicBlock *NewDefault =
SplitBlockPredecessors(SI->getDefaultDest(), SI->getParent(), "");
SI->setDefaultDest(&*NewDefault);
@@ -4607,24 +4634,20 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
// which we can constant-propagate the CaseVal, continue to its successor.
SmallDenseMap<Value *, Constant *> ConstantPool;
ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
- for (BasicBlock::iterator I = CaseDest->begin(), E = CaseDest->end(); I != E;
- ++I) {
- if (TerminatorInst *T = dyn_cast<TerminatorInst>(I)) {
+ for (Instruction &I :CaseDest->instructionsWithoutDebug()) {
+ if (TerminatorInst *T = dyn_cast<TerminatorInst>(&I)) {
// If the terminator is a simple branch, continue to the next block.
if (T->getNumSuccessors() != 1 || T->isExceptional())
return false;
Pred = CaseDest;
CaseDest = T->getSuccessor(0);
- } else if (isa<DbgInfoIntrinsic>(I)) {
- // Skip debug intrinsic.
- continue;
- } else if (Constant *C = ConstantFold(&*I, DL, ConstantPool)) {
+ } else if (Constant *C = ConstantFold(&I, DL, ConstantPool)) {
// Instruction is side-effect free and constant.
// If the instruction has uses outside this block or a phi node slot for
// the block, it is not safe to bypass the instruction since it would then
// no longer dominate all its uses.
- for (auto &Use : I->uses()) {
+ for (auto &Use : I.uses()) {
User *User = Use.getUser();
if (Instruction *I = dyn_cast<Instruction>(User))
if (I->getParent() == CaseDest)
@@ -4635,7 +4658,7 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
return false;
}
- ConstantPool.insert(std::make_pair(&*I, C));
+ ConstantPool.insert(std::make_pair(&I, C));
} else {
break;
}
@@ -4670,30 +4693,31 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
}
// Helper function used to add CaseVal to the list of cases that generate
-// Result.
-static void MapCaseToResult(ConstantInt *CaseVal,
- SwitchCaseResultVectorTy &UniqueResults,
- Constant *Result) {
+// Result. Returns the updated number of cases that generate this result.
+static uintptr_t MapCaseToResult(ConstantInt *CaseVal,
+ SwitchCaseResultVectorTy &UniqueResults,
+ Constant *Result) {
for (auto &I : UniqueResults) {
if (I.first == Result) {
I.second.push_back(CaseVal);
- return;
+ return I.second.size();
}
}
UniqueResults.push_back(
std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
+ return 1;
}
// Helper function that initializes a map containing
// results for the PHI node of the common destination block for a switch
// instruction. Returns false if multiple PHI nodes have been found or if
// there is not a common destination block for the switch.
-static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
- BasicBlock *&CommonDest,
- SwitchCaseResultVectorTy &UniqueResults,
- Constant *&DefaultResult,
- const DataLayout &DL,
- const TargetTransformInfo &TTI) {
+static bool
+InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest,
+ SwitchCaseResultVectorTy &UniqueResults,
+ Constant *&DefaultResult, const DataLayout &DL,
+ const TargetTransformInfo &TTI,
+ uintptr_t MaxUniqueResults, uintptr_t MaxCasesPerResult) {
for (auto &I : SI->cases()) {
ConstantInt *CaseVal = I.getCaseValue();
@@ -4703,10 +4727,21 @@ static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
DL, TTI))
return false;
- // Only one value per case is permitted
+ // Only one value per case is permitted.
if (Results.size() > 1)
return false;
- MapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
+
+ // Add the case->result mapping to UniqueResults.
+ const uintptr_t NumCasesForResult =
+ MapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
+
+ // Early out if there are too many cases for this result.
+ if (NumCasesForResult > MaxCasesPerResult)
+ return false;
+
+ // Early out if there are too many unique results.
+ if (UniqueResults.size() > MaxUniqueResults)
+ return false;
// Check the PHI consistency.
if (!PHI)
@@ -4806,7 +4841,7 @@ static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
SwitchCaseResultVectorTy UniqueResults;
// Collect all the cases that will deliver the same value from the switch.
if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
- DL, TTI))
+ DL, TTI, 2, 1))
return false;
// Selects choose between maximum two values.
if (UniqueResults.size() != 2)
@@ -5384,8 +5419,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
}
bool ReturnedEarly = false;
- for (size_t I = 0, E = PHIs.size(); I != E; ++I) {
- PHINode *PHI = PHIs[I];
+ for (PHINode *PHI : PHIs) {
const ResultListTy &ResultList = ResultLists[PHI];
// If using a bitmask, use any value to fill the lookup table holes.
@@ -5475,7 +5509,7 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
SmallVector<int64_t,4> Values;
for (auto &C : SI->cases())
Values.push_back(C.getCaseValue()->getValue().getSExtValue());
- std::sort(Values.begin(), Values.end());
+ llvm::sort(Values.begin(), Values.end());
// If the switch is already dense, there's nothing useful to do here.
if (isSwitchDense(Values))
@@ -5558,11 +5592,7 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// If the block only contains the switch, see if we can fold the block
// away into any preds.
- BasicBlock::iterator BBI = BB->begin();
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(BBI))
- ++BBI;
- if (SI == &*BBI)
+ if (SI == &*BB->instructionsWithoutDebug().begin())
if (FoldValueComparisonIntoPredecessors(SI, Builder))
return simplifyCFG(BB, TTI, Options) | true;
}
@@ -5649,7 +5679,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
/// any transform which might inhibit optimization (such as our ability to
/// specialize a particular handler via tail commoning). We do this by not
/// merging any blocks which require us to introduce a phi. Since the same
-/// values are flowing through both blocks, we don't loose any ability to
+/// values are flowing through both blocks, we don't lose any ability to
/// specialize. If anything, we make such specialization more likely.
///
/// TODO - This transformation could remove entries from a phi in the target
@@ -5679,7 +5709,7 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
// We've found an identical block. Update our predecessors to take that
// path instead and make ourselves dead.
- SmallSet<BasicBlock *, 16> Preds;
+ SmallPtrSet<BasicBlock *, 16> Preds;
Preds.insert(pred_begin(BB), pred_end(BB));
for (BasicBlock *Pred : Preds) {
InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
@@ -5697,7 +5727,7 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
Inst.eraseFromParent();
}
- SmallSet<BasicBlock *, 16> Succs;
+ SmallPtrSet<BasicBlock *, 16> Succs;
Succs.insert(succ_begin(BB), succ_end(BB));
for (BasicBlock *Succ : Succs) {
Succ->removePredecessor(BB);
@@ -5721,9 +5751,12 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
// header. (This is for early invocations before loop simplify and
// vectorization to keep canonical loop forms for nested loops. These blocks
// can be eliminated when the pass is invoked later in the back-end.)
+ // Note that if BB has only one predecessor then we do not introduce new
+ // backedge, so we can eliminate BB.
bool NeedCanonicalLoop =
Options.NeedCanonicalLoop &&
- (LoopHeaders && (LoopHeaders->count(BB) || LoopHeaders->count(Succ)));
+ (LoopHeaders && pred_size(BB) > 1 &&
+ (LoopHeaders->count(BB) || LoopHeaders->count(Succ)));
BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator();
if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
!NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB))
@@ -5771,6 +5804,9 @@ static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) {
bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
BasicBlock *BB = BI->getParent();
+ const Function *Fn = BB->getParent();
+ if (Fn && Fn->hasFnAttribute(Attribute::OptForFuzzing))
+ return false;
// Conditional branch
if (isValueEqualityComparison(BI)) {
@@ -5783,18 +5819,12 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// This block must be empty, except for the setcond inst, if it exists.
// Ignore dbg intrinsics.
- BasicBlock::iterator I = BB->begin();
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(I))
- ++I;
+ auto I = BB->instructionsWithoutDebug().begin();
if (&*I == BI) {
if (FoldValueComparisonIntoPredecessors(BI, Builder))
return simplifyCFG(BB, TTI, Options) | true;
} else if (&*I == cast<Instruction>(BI->getCondition())) {
++I;
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(I))
- ++I;
if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
return simplifyCFG(BB, TTI, Options) | true;
}
@@ -5920,17 +5950,20 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
// Load from null is undefined.
if (LoadInst *LI = dyn_cast<LoadInst>(Use))
if (!LI->isVolatile())
- return LI->getPointerAddressSpace() == 0;
+ return !NullPointerIsDefined(LI->getFunction(),
+ LI->getPointerAddressSpace());
// Store to null is undefined.
if (StoreInst *SI = dyn_cast<StoreInst>(Use))
if (!SI->isVolatile())
- return SI->getPointerAddressSpace() == 0 &&
+ return (!NullPointerIsDefined(SI->getFunction(),
+ SI->getPointerAddressSpace())) &&
SI->getPointerOperand() == I;
// A call to null is undefined.
if (auto CS = CallSite(Use))
- return CS.getCalledValue() == I;
+ return !NullPointerIsDefined(CS->getFunction()) &&
+ CS.getCalledValue() == I;
}
return false;
}
@@ -5971,7 +6004,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
// or that just have themself as a predecessor. These are unreachable.
if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
BB->getSinglePredecessor() == BB) {
- DEBUG(dbgs() << "Removing BB: \n" << *BB);
+ LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
DeleteDeadBlock(BB);
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index ad1faea0a7ae..e381fbc34ab4 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -80,6 +81,7 @@ namespace {
bool replaceIVUserWithLoopInvariant(Instruction *UseInst);
bool eliminateOverflowIntrinsic(CallInst *CI);
+ bool eliminateTrunc(TruncInst *TI);
bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
bool makeIVComparisonInvariant(ICmpInst *ICmp, Value *IVOperand);
void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
@@ -147,8 +149,8 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
if (SE->getSCEV(UseInst) != FoldedExpr)
return nullptr;
- DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand
- << " -> " << *UseInst << '\n');
+ LLVM_DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand
+ << " -> " << *UseInst << '\n');
UseInst->setOperand(OperIdx, IVSrc);
assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper");
@@ -221,7 +223,7 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
// for now.
return false;
- DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n');
+ LLVM_DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n');
ICmp->setPredicate(InvariantPredicate);
ICmp->setOperand(0, NewLHS);
ICmp->setOperand(1, NewRHS);
@@ -252,11 +254,11 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
if (SE->isKnownPredicate(Pred, S, X)) {
ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
DeadInsts.emplace_back(ICmp);
- DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+ LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
} else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) {
ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
DeadInsts.emplace_back(ICmp);
- DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+ LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
} else if (makeIVComparisonInvariant(ICmp, IVOperand)) {
// fallthrough to end of function
} else if (ICmpInst::isSigned(OriginalPred) &&
@@ -267,7 +269,8 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
// we turn the instruction's predicate to its unsigned version. Note that
// we cannot rely on Pred here unless we check if we have swapped it.
assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?");
- DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp << '\n');
+ LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp
+ << '\n');
ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred));
} else
return;
@@ -293,7 +296,7 @@ bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) {
SDiv->getName() + ".udiv", SDiv);
UDiv->setIsExact(SDiv->isExact());
SDiv->replaceAllUsesWith(UDiv);
- DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv << '\n');
+ LLVM_DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv << '\n');
++NumSimplifiedSDiv;
Changed = true;
DeadInsts.push_back(SDiv);
@@ -309,7 +312,7 @@ void SimplifyIndvar::replaceSRemWithURem(BinaryOperator *Rem) {
auto *URem = BinaryOperator::Create(BinaryOperator::URem, N, D,
Rem->getName() + ".urem", Rem);
Rem->replaceAllUsesWith(URem);
- DEBUG(dbgs() << "INDVARS: Simplified srem: " << *Rem << '\n');
+ LLVM_DEBUG(dbgs() << "INDVARS: Simplified srem: " << *Rem << '\n');
++NumSimplifiedSRem;
Changed = true;
DeadInsts.emplace_back(Rem);
@@ -318,7 +321,7 @@ void SimplifyIndvar::replaceSRemWithURem(BinaryOperator *Rem) {
// i % n --> i if i is in [0,n).
void SimplifyIndvar::replaceRemWithNumerator(BinaryOperator *Rem) {
Rem->replaceAllUsesWith(Rem->getOperand(0));
- DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
+ LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
++NumElimRem;
Changed = true;
DeadInsts.emplace_back(Rem);
@@ -332,7 +335,7 @@ void SimplifyIndvar::replaceRemWithNumeratorOrZero(BinaryOperator *Rem) {
SelectInst *Sel =
SelectInst::Create(ICmp, ConstantInt::get(T, 0), N, "iv.rem", Rem);
Rem->replaceAllUsesWith(Sel);
- DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
+ LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
++NumElimRem;
Changed = true;
DeadInsts.emplace_back(Rem);
@@ -492,6 +495,118 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
return true;
}
+bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) {
+ // It is always legal to replace
+ // icmp <pred> i32 trunc(iv), n
+ // with
+ // icmp <pred> i64 sext(trunc(iv)), sext(n), if pred is signed predicate.
+ // Or with
+ // icmp <pred> i64 zext(trunc(iv)), zext(n), if pred is unsigned predicate.
+ // Or with either of these if pred is an equality predicate.
+ //
+ // If we can prove that iv == sext(trunc(iv)) or iv == zext(trunc(iv)) for
+ // every comparison which uses trunc, it means that we can replace each of
+ // them with comparison of iv against sext/zext(n). We no longer need trunc
+ // after that.
+ //
+ // TODO: Should we do this if we can widen *some* comparisons, but not all
+ // of them? Sometimes it is enough to enable other optimizations, but the
+ // trunc instruction will stay in the loop.
+ Value *IV = TI->getOperand(0);
+ Type *IVTy = IV->getType();
+ const SCEV *IVSCEV = SE->getSCEV(IV);
+ const SCEV *TISCEV = SE->getSCEV(TI);
+
+ // Check if iv == zext(trunc(iv)) and if iv == sext(trunc(iv)). If so, we can
+ // get rid of trunc
+ bool DoesSExtCollapse = false;
+ bool DoesZExtCollapse = false;
+ if (IVSCEV == SE->getSignExtendExpr(TISCEV, IVTy))
+ DoesSExtCollapse = true;
+ if (IVSCEV == SE->getZeroExtendExpr(TISCEV, IVTy))
+ DoesZExtCollapse = true;
+
+ // If neither sext nor zext does collapse, it is not profitable to do any
+ // transform. Bail.
+ if (!DoesSExtCollapse && !DoesZExtCollapse)
+ return false;
+
+ // Collect users of the trunc that look like comparisons against invariants.
+ // Bail if we find something different.
+ SmallVector<ICmpInst *, 4> ICmpUsers;
+ for (auto *U : TI->users()) {
+ // We don't care about users in unreachable blocks.
+ if (isa<Instruction>(U) &&
+ !DT->isReachableFromEntry(cast<Instruction>(U)->getParent()))
+ continue;
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) {
+ if (ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) {
+ assert(L->contains(ICI->getParent()) && "LCSSA form broken?");
+ // If we cannot get rid of trunc, bail.
+ if (ICI->isSigned() && !DoesSExtCollapse)
+ return false;
+ if (ICI->isUnsigned() && !DoesZExtCollapse)
+ return false;
+ // For equality, either signed or unsigned works.
+ ICmpUsers.push_back(ICI);
+ } else
+ return false;
+ } else
+ return false;
+ }
+
+ auto CanUseZExt = [&](ICmpInst *ICI) {
+ // Unsigned comparison can be widened as unsigned.
+ if (ICI->isUnsigned())
+ return true;
+ // Is it profitable to do zext?
+ if (!DoesZExtCollapse)
+ return false;
+ // For equality, we can safely zext both parts.
+ if (ICI->isEquality())
+ return true;
+ // Otherwise we can only use zext when comparing two non-negative or two
+ // negative values. But in practice, we will never pass DoesZExtCollapse
+ // check for a negative value, because zext(trunc(x)) is non-negative. So
+ // it only make sense to check for non-negativity here.
+ const SCEV *SCEVOP1 = SE->getSCEV(ICI->getOperand(0));
+ const SCEV *SCEVOP2 = SE->getSCEV(ICI->getOperand(1));
+ return SE->isKnownNonNegative(SCEVOP1) && SE->isKnownNonNegative(SCEVOP2);
+ };
+ // Replace all comparisons against trunc with comparisons against IV.
+ for (auto *ICI : ICmpUsers) {
+ auto *Op1 = ICI->getOperand(1);
+ Instruction *Ext = nullptr;
+ // For signed/unsigned predicate, replace the old comparison with comparison
+ // of immediate IV against sext/zext of the invariant argument. If we can
+ // use either sext or zext (i.e. we are dealing with equality predicate),
+ // then prefer zext as a more canonical form.
+ // TODO: If we see a signed comparison which can be turned into unsigned,
+ // we can do it here for canonicalization purposes.
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ if (CanUseZExt(ICI)) {
+ assert(DoesZExtCollapse && "Unprofitable zext?");
+ Ext = new ZExtInst(Op1, IVTy, "zext", ICI);
+ Pred = ICmpInst::getUnsignedPredicate(Pred);
+ } else {
+ assert(DoesSExtCollapse && "Unprofitable sext?");
+ Ext = new SExtInst(Op1, IVTy, "sext", ICI);
+ assert(Pred == ICmpInst::getSignedPredicate(Pred) && "Must be signed!");
+ }
+ bool Changed;
+ L->makeLoopInvariant(Ext, Changed);
+ (void)Changed;
+ ICmpInst *NewICI = new ICmpInst(ICI, Pred, IV, Ext);
+ ICI->replaceAllUsesWith(NewICI);
+ DeadInsts.emplace_back(ICI);
+ }
+
+ // Trunc no longer needed.
+ TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+ DeadInsts.emplace_back(TI);
+ return true;
+}
+
/// Eliminate an operation that consumes a simple IV and has no observable
/// side-effect given the range of IV values. IVOperand is guaranteed SCEVable,
/// but UseInst may not be.
@@ -516,6 +631,10 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
if (eliminateOverflowIntrinsic(CI))
return true;
+ if (auto *TI = dyn_cast<TruncInst>(UseInst))
+ if (eliminateTrunc(TI))
+ return true;
+
if (eliminateIdentitySCEV(UseInst, IVOperand))
return true;
@@ -548,8 +667,8 @@ bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) {
auto *Invariant = Rewriter.expandCodeFor(S, I->getType(), IP);
I->replaceAllUsesWith(Invariant);
- DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I
- << " with loop invariant: " << *S << '\n');
+ LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I
+ << " with loop invariant: " << *S << '\n');
++NumFoldedUser;
Changed = true;
DeadInsts.emplace_back(I);
@@ -589,7 +708,7 @@ bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand))
return false;
- DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
+ LLVM_DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
UseInst->replaceAllUsesWith(IVOperand);
++NumElimIdentity;
@@ -771,6 +890,15 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
SimpleIVUsers.pop_back_val();
Instruction *UseInst = UseOper.first;
+ // If a user of the IndVar is trivially dead, we prefer just to mark it dead
+ // rather than try to do some complex analysis or transformation (such as
+ // widening) basing on it.
+ // TODO: Propagate TLI and pass it here to handle more cases.
+ if (isInstructionTriviallyDead(UseInst, /* TLI */ nullptr)) {
+ DeadInsts.emplace_back(UseInst);
+ continue;
+ }
+
// Bypass back edges to avoid extra work.
if (UseInst == CurrIV) continue;
@@ -783,7 +911,7 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
for (unsigned N = 0; IVOperand; ++N) {
assert(N <= Simplified.size() && "runaway iteration");
- Value *NewOper = foldIVUser(UseOper.first, IVOperand);
+ Value *NewOper = foldIVUser(UseInst, IVOperand);
if (!NewOper)
break; // done folding
IVOperand = dyn_cast<Instruction>(NewOper);
@@ -791,12 +919,12 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
if (!IVOperand)
continue;
- if (eliminateIVUser(UseOper.first, IVOperand)) {
+ if (eliminateIVUser(UseInst, IVOperand)) {
pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
continue;
}
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseOper.first)) {
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseInst)) {
if ((isa<OverflowingBinaryOperator>(BO) &&
strengthenOverflowingOperation(BO, IVOperand)) ||
(isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand))) {
@@ -806,13 +934,13 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
}
}
- CastInst *Cast = dyn_cast<CastInst>(UseOper.first);
+ CastInst *Cast = dyn_cast<CastInst>(UseInst);
if (V && Cast) {
V->visitCast(Cast);
continue;
}
- if (isSimpleIVUser(UseOper.first, L, SE)) {
- pushIVUsers(UseOper.first, L, Simplified, SimpleIVUsers);
+ if (isSimpleIVUser(UseInst, L, SE)) {
+ pushIVUsers(UseInst, L, Simplified, SimpleIVUsers);
}
}
}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
deleted file mode 100644
index f3d4f2ef38d7..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-//===------ SimplifyInstructions.cpp - Remove redundant instructions ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This is a utility pass used for testing the InstructionSimplify analysis.
-// The analysis is applied to every instruction, and if it simplifies then the
-// instruction is replaced by the simplification. If you are looking for a pass
-// that performs serious instruction folding, use the instcombine pass instead.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/SimplifyInstructions.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/OptimizationRemarkEmitter.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Local.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "instsimplify"
-
-STATISTIC(NumSimplified, "Number of redundant instructions removed");
-
-static bool runImpl(Function &F, const SimplifyQuery &SQ,
- OptimizationRemarkEmitter *ORE) {
- SmallPtrSet<const Instruction *, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
- bool Changed = false;
-
- do {
- for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
- // Here be subtlety: the iterator must be incremented before the loop
- // body (not sure why), so a range-for loop won't work here.
- for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
- Instruction *I = &*BI++;
- // The first time through the loop ToSimplify is empty and we try to
- // simplify all instructions. On later iterations ToSimplify is not
- // empty and we only bother simplifying instructions that are in it.
- if (!ToSimplify->empty() && !ToSimplify->count(I))
- continue;
-
- // Don't waste time simplifying unused instructions.
- if (!I->use_empty()) {
- if (Value *V = SimplifyInstruction(I, SQ, ORE)) {
- // Mark all uses for resimplification next time round the loop.
- for (User *U : I->users())
- Next->insert(cast<Instruction>(U));
- I->replaceAllUsesWith(V);
- ++NumSimplified;
- Changed = true;
- }
- }
- if (RecursivelyDeleteTriviallyDeadInstructions(I, SQ.TLI)) {
- // RecursivelyDeleteTriviallyDeadInstruction can remove more than one
- // instruction, so simply incrementing the iterator does not work.
- // When instructions get deleted re-iterate instead.
- BI = BB->begin();
- BE = BB->end();
- Changed = true;
- }
- }
- }
-
- // Place the list of instructions to simplify on the next loop iteration
- // into ToSimplify.
- std::swap(ToSimplify, Next);
- Next->clear();
- } while (!ToSimplify->empty());
-
- return Changed;
-}
-
-namespace {
- struct InstSimplifier : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- InstSimplifier() : FunctionPass(ID) {
- initializeInstSimplifierPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
- }
-
- /// runOnFunction - Remove instructions that simplify.
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
-
- const DominatorTree *DT =
- &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- AssumptionCache *AC =
- &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- OptimizationRemarkEmitter *ORE =
- &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
- const DataLayout &DL = F.getParent()->getDataLayout();
- const SimplifyQuery SQ(DL, TLI, DT, AC);
- return runImpl(F, SQ, ORE);
- }
- };
-}
-
-char InstSimplifier::ID = 0;
-INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify",
- "Remove redundant instructions", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-INITIALIZE_PASS_END(InstSimplifier, "instsimplify",
- "Remove redundant instructions", false, false)
-char &llvm::InstructionSimplifierID = InstSimplifier::ID;
-
-// Public interface to the simplify instructions pass.
-FunctionPass *llvm::createInstructionSimplifierPass() {
- return new InstSimplifier();
-}
-
-PreservedAnalyses InstSimplifierPass::run(Function &F,
- FunctionAnalysisManager &AM) {
- auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
- auto &AC = AM.getResult<AssumptionAnalysis>(F);
- auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- const DataLayout &DL = F.getParent()->getDataLayout();
- const SimplifyQuery SQ(DL, &TLI, &DT, &AC);
- bool Changed = runImpl(F, SQ, &ORE);
- if (!Changed)
- return PreservedAnalyses::all();
-
- PreservedAnalyses PA;
- PA.preserveSet<CFGAnalyses>();
- return PA;
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 03a1d55ddc30..8c48597fc2e4 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -7,10 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This is a utility pass used for testing the InstructionSimplify analysis.
-// The analysis is applied to every instruction, and if it simplifies then the
-// instruction is replaced by the simplification. If you are looking for a pass
-// that performs serious instruction folding, use the instcombine pass instead.
+// This file implements the library calls simplifier. It does not implement
+// any pass, but can't be used by other passes to do simplifications.
//
//===----------------------------------------------------------------------===//
@@ -21,7 +19,9 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -33,7 +33,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
-#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
using namespace PatternMatch;
@@ -104,19 +103,51 @@ static bool callHasFloatingPointArgument(const CallInst *CI) {
});
}
-/// \brief Check whether the overloaded unary floating point function
-/// corresponding to \a Ty is available.
-static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
- LibFunc DoubleFn, LibFunc FloatFn,
- LibFunc LongDoubleFn) {
- switch (Ty->getTypeID()) {
- case Type::FloatTyID:
- return TLI->has(FloatFn);
- case Type::DoubleTyID:
- return TLI->has(DoubleFn);
- default:
- return TLI->has(LongDoubleFn);
- }
+static Value *convertStrToNumber(CallInst *CI, StringRef &Str, int64_t Base) {
+ if (Base < 2 || Base > 36)
+ // handle special zero base
+ if (Base != 0)
+ return nullptr;
+
+ char *End;
+ std::string nptr = Str.str();
+ errno = 0;
+ long long int Result = strtoll(nptr.c_str(), &End, Base);
+ if (errno)
+ return nullptr;
+
+ // if we assume all possible target locales are ASCII supersets,
+ // then if strtoll successfully parses a number on the host,
+ // it will also successfully parse the same way on the target
+ if (*End != '\0')
+ return nullptr;
+
+ if (!isIntN(CI->getType()->getPrimitiveSizeInBits(), Result))
+ return nullptr;
+
+ return ConstantInt::get(CI->getType(), Result);
+}
+
+static bool isLocallyOpenedFile(Value *File, CallInst *CI, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
+ CallInst *FOpen = dyn_cast<CallInst>(File);
+ if (!FOpen)
+ return false;
+
+ Function *InnerCallee = FOpen->getCalledFunction();
+ if (!InnerCallee)
+ return false;
+
+ LibFunc Func;
+ if (!TLI->getLibFunc(*InnerCallee, Func) || !TLI->has(Func) ||
+ Func != LibFunc_fopen)
+ return false;
+
+ inferLibFuncAttributes(*CI->getCalledFunction(), *TLI);
+ if (PointerMayBeCaptured(File, true, true))
+ return false;
+
+ return true;
}
//===----------------------------------------------------------------------===//
@@ -156,9 +187,8 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
// We have enough information to now generate the memcpy call to do the
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
- B.CreateMemCpy(CpyDst, Src,
- ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1),
- 1);
+ B.CreateMemCpy(CpyDst, 1, Src, 1,
+ ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1));
return Dst;
}
@@ -346,8 +376,8 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
- B.CreateMemCpy(Dst, Src,
- ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), 1);
+ B.CreateMemCpy(Dst, 1, Src, 1,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len));
return Dst;
}
@@ -371,7 +401,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
- B.CreateMemCpy(Dst, Src, LenV, 1);
+ B.CreateMemCpy(Dst, 1, Src, 1, LenV);
return DstEnd;
}
@@ -388,7 +418,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
--SrcLen;
if (SrcLen == 0) {
- // strncpy(x, "", y) -> memset(x, '\0', y, 1)
+ // strncpy(x, "", y) -> memset(align 1 x, '\0', y)
B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1);
return Dst;
}
@@ -407,8 +437,8 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
return nullptr;
Type *PT = Callee->getFunctionType()->getParamType(0);
- // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
- B.CreateMemCpy(Dst, Src, ConstantInt::get(DL.getIntPtrType(PT), Len), 1);
+ // strncpy(x, s, c) -> memcpy(align 1 x, align 1 s, c) [s and c are constant]
+ B.CreateMemCpy(Dst, 1, Src, 1, ConstantInt::get(DL.getIntPtrType(PT), Len));
return Dst;
}
@@ -508,7 +538,7 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilder<> &B) {
- Module &M = *CI->getParent()->getParent()->getParent();
+ Module &M = *CI->getModule();
unsigned WCharSize = TLI->getWCharSize(M) * 8;
// We cannot perform this optimization without wchar_size metadata.
if (WCharSize == 0)
@@ -816,40 +846,19 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) {
- // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
- B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1);
+ // memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n)
+ B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
+ CI->getArgOperand(2));
return CI->getArgOperand(0);
}
Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
- // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
- B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1);
+ // memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n)
+ B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
+ CI->getArgOperand(2));
return CI->getArgOperand(0);
}
-// TODO: Does this belong in BuildLibCalls or should all of those similar
-// functions be moved here?
-static Value *emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs,
- IRBuilder<> &B, const TargetLibraryInfo &TLI) {
- LibFunc Func;
- if (!TLI.getLibFunc("calloc", Func) || !TLI.has(Func))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- const DataLayout &DL = M->getDataLayout();
- IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext()));
- Value *Calloc = M->getOrInsertFunction("calloc", Attrs, B.getInt8PtrTy(),
- PtrType, PtrType);
- CallInst *CI = B.CreateCall(Calloc, { Num, Size }, "calloc");
-
- if (const auto *F = dyn_cast<Function>(Calloc->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
-
- return CI;
-}
-
/// Fold memset[_chk](malloc(n), 0, n) --> calloc(1, n).
static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B,
const TargetLibraryInfo &TLI) {
@@ -901,12 +910,19 @@ Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) {
if (auto *Calloc = foldMallocMemset(CI, B, *TLI))
return Calloc;
- // memset(p, v, n) -> llvm.memset(p, v, n, 1)
+ // memset(p, v, n) -> llvm.memset(align 1 p, v, n)
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
return CI->getArgOperand(0);
}
+Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilder<> &B) {
+ if (isa<ConstantPointerNull>(CI->getArgOperand(0)))
+ return emitMalloc(CI->getArgOperand(1), B, DL, TLI);
+
+ return nullptr;
+}
+
//===----------------------------------------------------------------------===//
// Math Library Optimizations
//===----------------------------------------------------------------------===//
@@ -1666,12 +1682,12 @@ Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilder<> &B) {
- // abs(x) -> x >s -1 ? x : -x
- Value *Op = CI->getArgOperand(0);
- Value *Pos =
- B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()), "ispos");
- Value *Neg = B.CreateNeg(Op, "neg");
- return B.CreateSelect(Pos, Op, Neg);
+ // abs(x) -> x <s 0 ? -x : x
+ // The negation has 'nsw' because abs of INT_MIN is undefined.
+ Value *X = CI->getArgOperand(0);
+ Value *IsNeg = B.CreateICmpSLT(X, Constant::getNullValue(X->getType()));
+ Value *NegX = B.CreateNSWNeg(X, "neg");
+ return B.CreateSelect(IsNeg, NegX, X);
}
Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) {
@@ -1695,6 +1711,29 @@ Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilder<> &B) {
ConstantInt::get(CI->getType(), 0x7F));
}
+Value *LibCallSimplifier::optimizeAtoi(CallInst *CI, IRBuilder<> &B) {
+ StringRef Str;
+ if (!getConstantStringInfo(CI->getArgOperand(0), Str))
+ return nullptr;
+
+ return convertStrToNumber(CI, Str, 10);
+}
+
+Value *LibCallSimplifier::optimizeStrtol(CallInst *CI, IRBuilder<> &B) {
+ StringRef Str;
+ if (!getConstantStringInfo(CI->getArgOperand(0), Str))
+ return nullptr;
+
+ if (!isa<ConstantPointerNull>(CI->getArgOperand(1)))
+ return nullptr;
+
+ if (ConstantInt *CInt = dyn_cast<ConstantInt>(CI->getArgOperand(2))) {
+ return convertStrToNumber(CI, Str, CInt->getSExtValue());
+ }
+
+ return nullptr;
+}
+
//===----------------------------------------------------------------------===//
// Formatting and IO Library Call Optimizations
//===----------------------------------------------------------------------===//
@@ -1826,15 +1865,13 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
if (CI->getNumArgOperands() == 2) {
// Make sure there's no % in the constant array. We could try to handle
// %% -> % in the future if we cared.
- for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
- if (FormatStr[i] == '%')
- return nullptr; // we found a format specifier, bail out.
+ if (FormatStr.find('%') != StringRef::npos)
+ return nullptr; // we found a format specifier, bail out.
- // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
- B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ // sprintf(str, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, strlen(fmt)+1)
+ B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
ConstantInt::get(DL.getIntPtrType(CI->getContext()),
- FormatStr.size() + 1),
- 1); // Copy the null byte.
+ FormatStr.size() + 1)); // Copy the null byte.
return ConstantInt::get(CI->getType(), FormatStr.size());
}
@@ -1868,7 +1905,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
return nullptr;
Value *IncLen =
B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc");
- B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1);
+ B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(2), 1, IncLen);
// The sprintf result is the unincremented number of bytes in the string.
return B.CreateIntCast(Len, CI->getType(), false);
@@ -1897,6 +1934,93 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
+Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) {
+ // Check for a fixed format string.
+ StringRef FormatStr;
+ if (!getConstantStringInfo(CI->getArgOperand(2), FormatStr))
+ return nullptr;
+
+ // Check for size
+ ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ if (!Size)
+ return nullptr;
+
+ uint64_t N = Size->getZExtValue();
+
+ // If we just have a format string (nothing else crazy) transform it.
+ if (CI->getNumArgOperands() == 3) {
+ // Make sure there's no % in the constant array. We could try to handle
+ // %% -> % in the future if we cared.
+ if (FormatStr.find('%') != StringRef::npos)
+ return nullptr; // we found a format specifier, bail out.
+
+ if (N == 0)
+ return ConstantInt::get(CI->getType(), FormatStr.size());
+ else if (N < FormatStr.size() + 1)
+ return nullptr;
+
+ // sprintf(str, size, fmt) -> llvm.memcpy(align 1 str, align 1 fmt,
+ // strlen(fmt)+1)
+ B.CreateMemCpy(
+ CI->getArgOperand(0), 1, CI->getArgOperand(2), 1,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()),
+ FormatStr.size() + 1)); // Copy the null byte.
+ return ConstantInt::get(CI->getType(), FormatStr.size());
+ }
+
+ // The remaining optimizations require the format string to be "%s" or "%c"
+ // and have an extra operand.
+ if (FormatStr.size() == 2 && FormatStr[0] == '%' &&
+ CI->getNumArgOperands() == 4) {
+
+ // Decode the second character of the format string.
+ if (FormatStr[1] == 'c') {
+ if (N == 0)
+ return ConstantInt::get(CI->getType(), 1);
+ else if (N == 1)
+ return nullptr;
+
+ // snprintf(dst, size, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
+ if (!CI->getArgOperand(3)->getType()->isIntegerTy())
+ return nullptr;
+ Value *V = B.CreateTrunc(CI->getArgOperand(3), B.getInt8Ty(), "char");
+ Value *Ptr = castToCStr(CI->getArgOperand(0), B);
+ B.CreateStore(V, Ptr);
+ Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
+ B.CreateStore(B.getInt8(0), Ptr);
+
+ return ConstantInt::get(CI->getType(), 1);
+ }
+
+ if (FormatStr[1] == 's') {
+ // snprintf(dest, size, "%s", str) to llvm.memcpy(dest, str, len+1, 1)
+ StringRef Str;
+ if (!getConstantStringInfo(CI->getArgOperand(3), Str))
+ return nullptr;
+
+ if (N == 0)
+ return ConstantInt::get(CI->getType(), Str.size());
+ else if (N < Str.size() + 1)
+ return nullptr;
+
+ B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(3), 1,
+ ConstantInt::get(CI->getType(), Str.size() + 1));
+
+ // The snprintf result is the unincremented number of bytes in the string.
+ return ConstantInt::get(CI->getType(), Str.size());
+ }
+ }
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilder<> &B) {
+ if (Value *V = optimizeSnPrintFString(CI, B)) {
+ return V;
+ }
+
+ return nullptr;
+}
+
Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) {
optimizeErrorReporting(CI, B, 0);
@@ -1913,9 +2037,9 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) {
// fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
if (CI->getNumArgOperands() == 2) {
- for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
- if (FormatStr[i] == '%') // Could handle %% -> % if we cared.
- return nullptr; // We found a format specifier.
+ // Could handle %% -> % if we cared.
+ if (FormatStr.find('%') != StringRef::npos)
+ return nullptr; // We found a format specifier.
return emitFWrite(
CI->getArgOperand(1),
@@ -1973,22 +2097,27 @@ Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) {
// Get the element size and count.
ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
- if (!SizeC || !CountC)
- return nullptr;
- uint64_t Bytes = SizeC->getZExtValue() * CountC->getZExtValue();
-
- // If this is writing zero records, remove the call (it's a noop).
- if (Bytes == 0)
- return ConstantInt::get(CI->getType(), 0);
-
- // If this is writing one byte, turn it into fputc.
- // This optimisation is only valid, if the return value is unused.
- if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
- Value *Char = B.CreateLoad(castToCStr(CI->getArgOperand(0), B), "char");
- Value *NewCI = emitFPutC(Char, CI->getArgOperand(3), B, TLI);
- return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr;
+ if (SizeC && CountC) {
+ uint64_t Bytes = SizeC->getZExtValue() * CountC->getZExtValue();
+
+ // If this is writing zero records, remove the call (it's a noop).
+ if (Bytes == 0)
+ return ConstantInt::get(CI->getType(), 0);
+
+ // If this is writing one byte, turn it into fputc.
+ // This optimisation is only valid, if the return value is unused.
+ if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
+ Value *Char = B.CreateLoad(castToCStr(CI->getArgOperand(0), B), "char");
+ Value *NewCI = emitFPutC(Char, CI->getArgOperand(3), B, TLI);
+ return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr;
+ }
}
+ if (isLocallyOpenedFile(CI->getArgOperand(3), CI, B, TLI))
+ return emitFWriteUnlocked(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), CI->getArgOperand(3), B, DL,
+ TLI);
+
return nullptr;
}
@@ -1997,12 +2126,18 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
// Don't rewrite fputs to fwrite when optimising for size because fwrite
// requires more arguments and thus extra MOVs are required.
- if (CI->getParent()->getParent()->optForSize())
+ if (CI->getFunction()->optForSize())
return nullptr;
- // We can't optimize if return value is used.
- if (!CI->use_empty())
- return nullptr;
+ // Check if has any use
+ if (!CI->use_empty()) {
+ if (isLocallyOpenedFile(CI->getArgOperand(1), CI, B, TLI))
+ return emitFPutSUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), B,
+ TLI);
+ else
+ // We can't optimize if return value is used.
+ return nullptr;
+ }
// fputs(s,F) --> fwrite(s,1,strlen(s),F)
uint64_t Len = GetStringLength(CI->getArgOperand(0));
@@ -2016,6 +2151,40 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
CI->getArgOperand(1), B, DL, TLI);
}
+Value *LibCallSimplifier::optimizeFPutc(CallInst *CI, IRBuilder<> &B) {
+ optimizeErrorReporting(CI, B, 1);
+
+ if (isLocallyOpenedFile(CI->getArgOperand(1), CI, B, TLI))
+ return emitFPutCUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), B,
+ TLI);
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeFGetc(CallInst *CI, IRBuilder<> &B) {
+ if (isLocallyOpenedFile(CI->getArgOperand(0), CI, B, TLI))
+ return emitFGetCUnlocked(CI->getArgOperand(0), B, TLI);
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeFGets(CallInst *CI, IRBuilder<> &B) {
+ if (isLocallyOpenedFile(CI->getArgOperand(2), CI, B, TLI))
+ return emitFGetSUnlocked(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI);
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeFRead(CallInst *CI, IRBuilder<> &B) {
+ if (isLocallyOpenedFile(CI->getArgOperand(3), CI, B, TLI))
+ return emitFReadUnlocked(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), CI->getArgOperand(3), B, DL,
+ TLI);
+
+ return nullptr;
+}
+
Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {
// Check for a constant string.
StringRef Str;
@@ -2099,6 +2268,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
return optimizeMemMove(CI, Builder);
case LibFunc_memset:
return optimizeMemSet(CI, Builder);
+ case LibFunc_realloc:
+ return optimizeRealloc(CI, Builder);
case LibFunc_wcslen:
return optimizeWcslen(CI, Builder);
default:
@@ -2290,16 +2461,33 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
return optimizeIsAscii(CI, Builder);
case LibFunc_toascii:
return optimizeToAscii(CI, Builder);
+ case LibFunc_atoi:
+ case LibFunc_atol:
+ case LibFunc_atoll:
+ return optimizeAtoi(CI, Builder);
+ case LibFunc_strtol:
+ case LibFunc_strtoll:
+ return optimizeStrtol(CI, Builder);
case LibFunc_printf:
return optimizePrintF(CI, Builder);
case LibFunc_sprintf:
return optimizeSPrintF(CI, Builder);
+ case LibFunc_snprintf:
+ return optimizeSnPrintF(CI, Builder);
case LibFunc_fprintf:
return optimizeFPrintF(CI, Builder);
case LibFunc_fwrite:
return optimizeFWrite(CI, Builder);
+ case LibFunc_fread:
+ return optimizeFRead(CI, Builder);
case LibFunc_fputs:
return optimizeFPuts(CI, Builder);
+ case LibFunc_fgets:
+ return optimizeFGets(CI, Builder);
+ case LibFunc_fputc:
+ return optimizeFPutc(CI, Builder);
+ case LibFunc_fgetc:
+ return optimizeFGetc(CI, Builder);
case LibFunc_puts:
return optimizePuts(CI, Builder);
case LibFunc_perror:
@@ -2307,8 +2495,6 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
case LibFunc_vfprintf:
case LibFunc_fiprintf:
return optimizeErrorReporting(CI, Builder, 0);
- case LibFunc_fputc:
- return optimizeErrorReporting(CI, Builder, 1);
default:
return nullptr;
}
@@ -2393,8 +2579,8 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
IRBuilder<> &B) {
if (isFortifiedCallFoldable(CI, 3, 2, false)) {
- B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1);
+ B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
+ CI->getArgOperand(2));
return CI->getArgOperand(0);
}
return nullptr;
@@ -2403,8 +2589,8 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
IRBuilder<> &B) {
if (isFortifiedCallFoldable(CI, 3, 2, false)) {
- B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1);
+ B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
+ CI->getArgOperand(2));
return CI->getArgOperand(0);
}
return nullptr;
diff --git a/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp
index 968eb0208f43..f8d758c54983 100644
--- a/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp
@@ -101,7 +101,8 @@ static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap,
// At this point module should have the proper mix of globals and locals.
// As we attempt to partition this module, we must not change any
// locals to globals.
- DEBUG(dbgs() << "Partition module with (" << M->size() << ")functions\n");
+ LLVM_DEBUG(dbgs() << "Partition module with (" << M->size()
+ << ")functions\n");
ClusterMapType GVtoClusterMap;
ComdatMembersType ComdatMembers;
@@ -180,28 +181,31 @@ static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap,
std::make_pair(std::distance(GVtoClusterMap.member_begin(I),
GVtoClusterMap.member_end()), I));
- std::sort(Sets.begin(), Sets.end(), [](const SortType &a, const SortType &b) {
- if (a.first == b.first)
- return a.second->getData()->getName() > b.second->getData()->getName();
- else
- return a.first > b.first;
- });
+ llvm::sort(Sets.begin(), Sets.end(),
+ [](const SortType &a, const SortType &b) {
+ if (a.first == b.first)
+ return a.second->getData()->getName() >
+ b.second->getData()->getName();
+ else
+ return a.first > b.first;
+ });
for (auto &I : Sets) {
unsigned CurrentClusterID = BalancinQueue.top().first;
unsigned CurrentClusterSize = BalancinQueue.top().second;
BalancinQueue.pop();
- DEBUG(dbgs() << "Root[" << CurrentClusterID << "] cluster_size(" << I.first
- << ") ----> " << I.second->getData()->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "Root[" << CurrentClusterID << "] cluster_size("
+ << I.first << ") ----> " << I.second->getData()->getName()
+ << "\n");
for (ClusterMapType::member_iterator MI =
GVtoClusterMap.findLeader(I.second);
MI != GVtoClusterMap.member_end(); ++MI) {
if (!Visited.insert(*MI).second)
continue;
- DEBUG(dbgs() << "----> " << (*MI)->getName()
- << ((*MI)->hasLocalLinkage() ? " l " : " e ") << "\n");
+ LLVM_DEBUG(dbgs() << "----> " << (*MI)->getName()
+ << ((*MI)->hasLocalLinkage() ? " l " : " e ") << "\n");
Visited.insert(*MI);
ClusterIDMap[*MI] = CurrentClusterID;
CurrentClusterSize++;
@@ -270,7 +274,7 @@ void llvm::SplitModule(
for (unsigned I = 0; I < N; ++I) {
ValueToValueMapTy VMap;
std::unique_ptr<Module> MPart(
- CloneModule(M.get(), VMap, [&](const GlobalValue *GV) {
+ CloneModule(*M, VMap, [&](const GlobalValue *GV) {
if (ClusterIDMap.count(GV))
return (ClusterIDMap[GV] == I);
else
diff --git a/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp b/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
index 49dc15cf5e7c..ac0b519f4a77 100644
--- a/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
@@ -21,7 +21,6 @@
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
using namespace llvm;
@@ -75,6 +74,3 @@ bool StripGCRelocates::runOnFunction(Function &F) {
INITIALIZE_PASS(StripGCRelocates, "strip-gc-relocates",
"Strip gc.relocates inserted through RewriteStatepointsForGC",
true, false)
-FunctionPass *llvm::createStripGCRelocatesPass() {
- return new StripGCRelocates();
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
index cd0378e0140c..8956a089a99c 100644
--- a/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
@@ -9,7 +9,7 @@
#include "llvm/IR/DebugInfo.h"
#include "llvm/Pass.h"
-#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils.h"
using namespace llvm;
namespace {
diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index ed444e4cf43c..e633ac0c874d 100644
--- a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -19,7 +19,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
using namespace llvm;
char UnifyFunctionExitNodes::ID = 0;
diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
index f6c7d1c4989e..afd842f59911 100644
--- a/contrib/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
@@ -12,7 +12,10 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils.h"
#include "llvm-c/Initialization.h"
+#include "llvm-c/Transforms/Utils.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/PassRegistry.h"
@@ -33,7 +36,6 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) {
initializePromoteLegacyPassPass(Registry);
initializeStripNonLineTableDebugInfoPass(Registry);
initializeUnifyFunctionExitNodesPass(Registry);
- initializeInstSimplifierPass(Registry);
initializeMetaRenamerPass(Registry);
initializeStripGCRelocatesPass(Registry);
initializePredicateInfoPrinterLegacyPassPass(Registry);
@@ -43,3 +45,12 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) {
void LLVMInitializeTransformUtils(LLVMPassRegistryRef R) {
initializeTransformUtils(*unwrap(R));
}
+
+void LLVMAddLowerSwitchPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLowerSwitchPass());
+}
+
+void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createPromoteMemoryToRegisterPass());
+}
+
diff --git a/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp b/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp
index c3feea6a0a41..948d9bd5baad 100644
--- a/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp
@@ -20,8 +20,14 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy())
return false;
+ uint64_t StoreSize = DL.getTypeSizeInBits(StoredVal->getType());
+
+ // The store size must be byte-aligned to support future type casts.
+ if (llvm::alignTo(StoreSize, 8) != StoreSize)
+ return false;
+
// The store has to be at least as big as the load.
- if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy))
+ if (StoreSize < DL.getTypeSizeInBits(LoadTy))
return false;
// Don't coerce non-integral pointers to integers or vice versa.
@@ -389,8 +395,8 @@ Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy,
NewLoad->takeName(SrcVal);
NewLoad->setAlignment(SrcVal->getAlignment());
- DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
- DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
+ LLVM_DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
+ LLVM_DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
// Replace uses of the original load with the wider load. On a big endian
// system, we need to shift down to get the relevant bits.